From 086a3e107895ab119a950608597d4ca9e1984d7e Mon Sep 17 00:00:00 2001 From: Max042004 Date: Fri, 15 May 2026 14:29:06 +0800 Subject: [PATCH 1/7] Scaffold elfuse oci subcommand and image reference parser Lays the first slice of Phase 1 from issue #31: the elfuse oci subcommand surface and a self-contained OCI image reference parser. No registry, store, or unpack code lands here; this is the routing and parsing scaffold that every later piece depends on. src/main.c routes argv[1] == "oci" to oci_cli_main before the Hypervisor.framework setup runs, so image distribution never has to satisfy the host DC ZVA assertion or the HVF entitlement check. The existing arg parser, --help, --version, --fork-child, and guest execution paths are otherwise untouched. src/oci/cli.c implements pull, inspect, prune, and list dispatch. inspect parses a reference and prints the canonical form along with the registry, repository, tag, and digest fields, which proves the end-to-end wiring. The remaining subcommands return rc=2 with an explicit "not implemented yet" message rather than crashing or silently succeeding so users get a stable surface to script against. src/oci/ref.c implements the de-facto containerd/docker reference grammar: reference := name [":" tag] ["@" digest] name := [domain "/"] path domain := first slash component containing "." or ":" or equal to "localhost" path := component ("/" component)* component := [a-z0-9]+ ((["._-"] | "__") [a-z0-9]+)* tag := [A-Za-z0-9_] [A-Za-z0-9_.-]{0,127} digest := ("sha256" | "sha512") ":" lowercase-hex Defaults match Docker conventions: missing registry becomes docker.io, single-segment paths under docker.io pick up the library/ prefix, and missing tag/digest defaults the tag to latest. A digest- only reference leaves tag NULL so the canonical form does not fabricate a tag the user never wrote. Digest hex is required to be lowercase because the local content-addressable store will key off the canonical digest string and uppercase encodings would otherwise cause silent dedup misses. memrchr is GNU-only and Darwin libc does not ship it, so a small memrchr_local helper handles the rightmost-slash search the tag detector needs. The looks_like_domain helper compares localhost as a 9-byte literal (the earlier draft had a length bug here that the unit tests caught). tests/test-oci-ref.c is a native macOS test program (not cross- compiled, no Hypervisor.framework, no codesign) that links directly against src/oci/ref.c. It runs 14 happy-path cases covering Docker defaults, registry detection, port handling, sha256 and sha512 digests, tag+digest pinning, and every separator variant in the component grammar, plus 20 error cases covering empty input, NULL input, uppercase, malformed digests, double @, empty tag/digest suffixes, length limits, and structural validation. All 34 cases pass. mk/config.mk adds tests/test-oci-ref.c to NATIVE_TESTS so the cross- compile pattern rule does not pick it up. Makefile adds the link rule for build/test-oci-ref (no codesign because there is no HVF dependency). mk/tests.mk exposes test-oci-ref as a phony target and runs it as the last stage of make check, alongside the existing proctitle, busybox, sysroot, and timeout-disable validations. --- Makefile | 10 +- mk/config.mk | 2 +- mk/tests.mk | 8 +- src/main.c | 9 + src/oci/cli.c | 93 ++++++++++ src/oci/cli.h | 18 ++ src/oci/ref.c | 429 +++++++++++++++++++++++++++++++++++++++++++ src/oci/ref.h | 59 ++++++ tests/test-oci-ref.c | 282 ++++++++++++++++++++++++++++ 9 files changed, 907 insertions(+), 3 deletions(-) create mode 100644 src/oci/cli.c create mode 100644 src/oci/cli.h create mode 100644 src/oci/ref.c create mode 100644 src/oci/ref.h create mode 100644 tests/test-oci-ref.c diff --git a/Makefile b/Makefile index 45a921f..9187bed 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,9 @@ SRCS := \ debug/gdbstub.c \ debug/gdbstub-reg.c \ debug/gdbstub-rsp.c \ - debug/log.c + debug/log.c \ + oci/ref.c \ + oci/cli.c SRCS := $(addprefix src/,$(SRCS)) OBJS := $(patsubst src/%.c,$(BUILD_DIR)/%.o,$(SRCS)) @@ -128,6 +130,12 @@ $(BUILD_DIR)/test-multi-vcpu: $(BUILD_DIR)/test-multi-vcpu.o | $(BUILD_DIR) $(BUILD_DIR)/test-rwx: $(BUILD_DIR)/test-rwx.o | $(BUILD_DIR) $(call link-and-sign,$@,$<) +## Build the OCI reference parser unit test (native macOS binary). +## Pure C, no HVF, no codesign required. +$(BUILD_DIR)/test-oci-ref: $(BUILD_DIR)/test-oci-ref.o $(BUILD_DIR)/oci/ref.o | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ + # ── Guest test binaries (cross-compiled, aarch64-linux) ────────── # Only used when GUEST_TEST_BINARIES is not set. diff --git a/mk/config.mk b/mk/config.mk index 0c18aa9..e0a3dcb 100644 --- a/mk/config.mk +++ b/mk/config.mk @@ -15,7 +15,7 @@ ifeq ($(origin GUEST_TEST_BINARIES), undefined) endif # Exclude native macOS test files from cross-compilation -NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c +NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c tests/test-oci-ref.c SPECIAL_TEST_SRCS := tests/test-lowbase-mem.c SPECIAL_TEST_BINS := $(BUILD_DIR)/test-lowbase-mem-200000 $(BUILD_DIR)/test-lowbase-mem-300000 diff --git a/mk/tests.mk b/mk/tests.mk index 844b16c..01cf141 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -5,7 +5,7 @@ test-dynamic test-dynamic-coreutils test-glibc-dynamic \ test-glibc-coreutils test-perf \ test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \ - test-full test-multi-vcpu test-rwx test-sysroot-rename \ + test-full test-multi-vcpu test-rwx test-oci-ref test-sysroot-rename \ test-case-collision test-case-collision-fallback test-sysroot-create-paths \ test-proctitle-low-stack \ test-sysroot-procfs-exec test-timeout-disable \ @@ -31,6 +31,12 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage @$(MAKE) --no-print-directory test-sysroot-procfs-exec @printf "\n$(BLUE)━━━ timeout=0 validation ━━━$(RESET)\n" @$(MAKE) --no-print-directory test-timeout-disable + @printf "\n$(BLUE)━━━ OCI reference parser unit tests ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-ref + +## Run the OCI image reference parser unit tests (native, no HVF) +test-oci-ref: $(BUILD_DIR)/test-oci-ref + @$(BUILD_DIR)/test-oci-ref test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename @tmpdir=$$(mktemp -d); \ diff --git a/src/main.c b/src/main.c index cebf591..3b01652 100644 --- a/src/main.c +++ b/src/main.c @@ -31,6 +31,8 @@ #include "core/guest.h" #include "core/sysroot.h" +#include "oci/cli.h" + #include "runtime/forkipc.h" #include "runtime/proctitle.h" @@ -127,6 +129,13 @@ int main(int argc, char **argv) bool gdb_stop_on_entry = false; int arg_start = 1; + /* `elfuse oci ...` is a self-contained CLI subcommand: image distribution + * never touches Hypervisor.framework, so dispatch before any guest setup + * to avoid host-DC-ZVA / entitlement checks the user never asked for. + */ + if (argc > 1 && !strcmp(argv[1], "oci")) + return oci_cli_main(argc - 1, argv + 1); + /* --help and --version do not require an ELF path. */ if (argc > 1) { if (!strcmp(argv[1], "--version") || !strcmp(argv[1], "-V")) { diff --git a/src/oci/cli.c b/src/oci/cli.c new file mode 100644 index 0000000..314917d --- /dev/null +++ b/src/oci/cli.c @@ -0,0 +1,93 @@ +/* `elfuse oci` subcommand dispatch + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Phase 1 only wires the inspect path through the reference parser. pull, + * prune, and list intentionally exit 2 with an explanatory message so early + * users get a stable surface to script against without touching code that + * does not yet exist. + */ + +#include "cli.h" + +#include +#include +#include + +#include "ref.h" + +static int print_usage(FILE *out) +{ + fputs( + "usage: elfuse oci [args]\n" + "\n" + "Subcommands:\n" + " pull Download an image into the local store\n" + " inspect Show the canonical reference and parsed fields\n" + " prune Remove unreferenced blobs from the local store\n" + " list List images in the local store\n" + "\n" + "Refs follow the docker/containerd grammar:\n" + " alpine, alpine:3.20, user/repo, ghcr.io/owner/img:tag,\n" + " repo@sha256:, repo:tag@sha256:\n", + out); + return out == stderr ? 2 : 0; +} + +static int cmd_inspect(int argc, char **argv) +{ + if (argc != 2) { + fputs("error: inspect takes exactly one reference argument\n", stderr); + return 2; + } + oci_ref_t ref; + const char *err = NULL; + if (oci_ref_parse(argv[1], &ref, &err) < 0) { + fprintf(stderr, "error: %s\n", err ? err : "invalid reference"); + return 1; + } + char *canonical = oci_ref_canonical(&ref); + if (!canonical) { + fputs("error: out of memory rendering canonical reference\n", stderr); + oci_ref_free(&ref); + return 1; + } + printf("canonical: %s\n", canonical); + printf("registry: %s\n", ref.registry); + printf("repository: %s\n", ref.repository); + printf("tag: %s\n", ref.tag ? ref.tag : "(none)"); + printf("digest: %s\n", ref.digest ? ref.digest : "(none)"); + free(canonical); + oci_ref_free(&ref); + return 0; +} + +static int cmd_not_implemented(const char *name) +{ + fprintf(stderr, + "error: 'oci %s' is not implemented yet (see issue #31 Phase 1)\n", + name); + return 2; +} + +int oci_cli_main(int argc, char **argv) +{ + if (argc < 2) + return print_usage(stderr); + + const char *sub = argv[1]; + if (!strcmp(sub, "-h") || !strcmp(sub, "--help") || !strcmp(sub, "help")) + return print_usage(stdout); + if (!strcmp(sub, "inspect")) + return cmd_inspect(argc - 1, argv + 1); + if (!strcmp(sub, "pull")) + return cmd_not_implemented("pull"); + if (!strcmp(sub, "prune")) + return cmd_not_implemented("prune"); + if (!strcmp(sub, "list") || !strcmp(sub, "ls")) + return cmd_not_implemented("list"); + + fprintf(stderr, "error: unknown oci subcommand: %s\n", sub); + return print_usage(stderr); +} diff --git a/src/oci/cli.h b/src/oci/cli.h new file mode 100644 index 0000000..781efd4 --- /dev/null +++ b/src/oci/cli.h @@ -0,0 +1,18 @@ +/* `elfuse oci` subcommand dispatch + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Sits on the side of the main argv parser: when argv[1] == "oci" the rest + * of the command line is forwarded here. Subcommands are pull, inspect, + * prune, and list. Only inspect parses a reference today; the others return + * a deterministic "not yet implemented" exit so users can discover the + * surface without crashes. + */ + +#pragma once + +/* argc/argv are the slice starting at "oci" (i.e. argv[0] == "oci"). Returns + * a process exit code suitable for main() to return directly. + */ +int oci_cli_main(int argc, char **argv); diff --git a/src/oci/ref.c b/src/oci/ref.c new file mode 100644 index 0000000..1f49321 --- /dev/null +++ b/src/oci/ref.c @@ -0,0 +1,429 @@ +/* OCI image reference parser + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * See ref.h for the grammar and design notes. The parser is split into: + * 1. find the optional @digest suffix and validate it + * 2. find the optional :tag suffix on the remainder + * 3. split the rest into registry vs path using the containerd domain rule + * 4. apply Docker defaults (docker.io, library/, latest) + * 5. validate every component against the OCI character class rules + */ + +#include "ref.h" + +#include +#include +#include +#include +#include +#include + +#define DEFAULT_REGISTRY "docker.io" +#define DEFAULT_LIBRARY_NAMESPACE "library" +#define DEFAULT_TAG "latest" + +#define MAX_REFERENCE_LEN 4096 +#define MAX_TAG_LEN 128 + +static char *strndup_local(const char *src, size_t n) +{ + char *dst = (char *) malloc(n + 1); + if (!dst) + return NULL; + memcpy(dst, src, n); + dst[n] = '\0'; + return dst; +} + +static void set_err(const char **slot, const char *msg) +{ + if (slot) + *slot = msg; +} + +static bool is_lower_alnum(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); +} + +static bool is_path_separator(char c) +{ + return c == '.' || c == '_' || c == '-'; +} + +/* Validate one path component against [a-z0-9]+ (([._-]|__) [a-z0-9]+)*. + * Empty components and uppercase letters are rejected. + */ +static bool valid_path_component(const char *s, size_t len) +{ + if (len == 0) + return false; + if (!is_lower_alnum(s[0]) || !is_lower_alnum(s[len - 1])) + return false; + + size_t i = 0; + while (i < len) { + if (is_lower_alnum(s[i])) { + i++; + continue; + } + /* Separator run: a single '.', '-', '_', or exactly "__". Anything + * else is rejected so paths like "a..b" or "a___b" do not slip + * through. + */ + if (s[i] == '_' && i + 1 < len && s[i + 1] == '_') { + i += 2; + } else if (is_path_separator(s[i])) { + i++; + } else { + return false; + } + if (i >= len || !is_lower_alnum(s[i])) + return false; + } + return true; +} + +/* Validate a multi-component path (components separated by '/'). */ +static bool valid_repository_path(const char *s, size_t len) +{ + if (len == 0) + return false; + size_t start = 0; + for (size_t i = 0; i < len; i++) { + if (s[i] == '/') { + if (!valid_path_component(s + start, i - start)) + return false; + start = i + 1; + } + } + return valid_path_component(s + start, len - start); +} + +/* Domain detection per containerd: a leading slash component is a registry + * only when it contains '.' or ':', or when it is exactly "localhost". + */ +static bool looks_like_domain(const char *s, size_t len) +{ + if (len == 9 && memcmp(s, "localhost", 9) == 0) + return true; + for (size_t i = 0; i < len; i++) { + if (s[i] == '.' || s[i] == ':') + return true; + } + return false; +} + +/* Portable rightmost-match: Darwin libc does not ship memrchr. */ +static const char *memrchr_local(const char *s, int c, size_t n) +{ + while (n > 0) { + n--; + if ((unsigned char) s[n] == (unsigned char) c) + return s + n; + } + return NULL; +} + +/* Validate a registry host[:port]. The host portion is permissive (DNS + * label rules plus IPv6 brackets are not enforced) but uppercase letters + * are accepted because hostnames are case-insensitive. The optional port + * suffix must be a 1..5 digit decimal number. + */ +static bool valid_registry(const char *s, size_t len) +{ + if (len == 0) + return false; + /* Reject embedded whitespace or path separators outright. */ + for (size_t i = 0; i < len; i++) { + unsigned char c = (unsigned char) s[i]; + if (c <= ' ' || c == '/' || c == '@') + return false; + } + /* If there is a ':' it must be followed by 1..5 decimal digits and must + * be the last colon (IPv6 in brackets is not yet supported). + */ + const char *colon = memchr(s, ':', len); + if (colon) { + size_t host_len = (size_t) (colon - s); + size_t port_len = len - host_len - 1; + if (host_len == 0 || port_len == 0 || port_len > 5) + return false; + for (size_t i = 0; i < port_len; i++) { + if (colon[1 + i] < '0' || colon[1 + i] > '9') + return false; + } + } + return true; +} + +static bool valid_tag(const char *s, size_t len) +{ + if (len == 0 || len > MAX_TAG_LEN) + return false; + /* First char: word character (letter, digit, underscore). */ + unsigned char c0 = (unsigned char) s[0]; + if (!isalnum(c0) && c0 != '_') + return false; + for (size_t i = 1; i < len; i++) { + unsigned char c = (unsigned char) s[i]; + if (!isalnum(c) && c != '_' && c != '.' && c != '-') + return false; + } + return true; +} + +static bool is_lower_hex(char c) +{ + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); +} + +/* Validate ":" with algo in {sha256, sha512}. The hex digits are + * required to be lowercase per the OCI image-spec descriptor canonicalisation + * rules; uppercase encodings would otherwise cause silent dedup misses in + * the local store. + */ +static bool valid_digest(const char *s, size_t len, const char **err_msg) +{ + const char *colon = memchr(s, ':', len); + if (!colon) { + set_err(err_msg, "digest missing ':' separator"); + return false; + } + size_t algo_len = (size_t) (colon - s); + size_t hex_len = len - algo_len - 1; + + size_t expected_hex; + if (algo_len == 6 && memcmp(s, "sha256", 6) == 0) { + expected_hex = 64; + } else if (algo_len == 6 && memcmp(s, "sha512", 6) == 0) { + expected_hex = 128; + } else { + set_err(err_msg, + "digest algorithm must be sha256 or sha512"); + return false; + } + if (hex_len != expected_hex) { + set_err(err_msg, "digest hex length does not match algorithm"); + return false; + } + for (size_t i = 0; i < hex_len; i++) { + if (!is_lower_hex(colon[1 + i])) { + set_err(err_msg, "digest hex must be lowercase 0-9 a-f"); + return false; + } + } + return true; +} + +void oci_ref_free(oci_ref_t *ref) +{ + if (!ref) + return; + free(ref->registry); + free(ref->repository); + free(ref->tag); + free(ref->digest); + ref->registry = NULL; + ref->repository = NULL; + ref->tag = NULL; + ref->digest = NULL; +} + +int oci_ref_parse(const char *input, oci_ref_t *out, const char **err_msg) +{ + set_err(err_msg, NULL); + if (!out) + return -1; + memset(out, 0, sizeof(*out)); + + if (!input) { + set_err(err_msg, "reference is NULL"); + return -1; + } + size_t total = strlen(input); + if (total == 0) { + set_err(err_msg, "reference is empty"); + return -1; + } + if (total > MAX_REFERENCE_LEN) { + set_err(err_msg, "reference exceeds 4096 characters"); + return -1; + } + + /* Step 1: split off "@digest" (rightmost '@' wins because '@' cannot + * legally appear elsewhere in a well-formed reference). + */ + const char *digest_start = NULL; + size_t digest_len = 0; + const char *at = memchr(input, '@', total); + if (at) { + /* Reject multiple '@' separators outright. */ + const char *second = memchr(at + 1, '@', total - (size_t) (at + 1 - input)); + if (second) { + set_err(err_msg, "reference contains multiple '@' separators"); + return -1; + } + digest_start = at + 1; + digest_len = total - (size_t) (digest_start - input); + if (digest_len == 0) { + set_err(err_msg, "digest is empty after '@'"); + return -1; + } + if (!valid_digest(digest_start, digest_len, err_msg)) + return -1; + total = (size_t) (at - input); + if (total == 0) { + set_err(err_msg, "reference has no name before '@'"); + return -1; + } + } + + /* Step 2: peel off ":tag" if present. The tag separator is the rightmost + * ':' that follows the last '/' (a colon before any '/' belongs to the + * registry's port). + */ + const char *tag_start = NULL; + size_t tag_len = 0; + size_t name_len = total; + const char *last_slash = memrchr_local(input, '/', total); + const char *scan_from = last_slash ? last_slash + 1 : input; + const char *scan_end = input + total; + const char *tag_colon = memchr(scan_from, ':', + (size_t) (scan_end - scan_from)); + if (tag_colon) { + tag_start = tag_colon + 1; + tag_len = total - (size_t) (tag_start - input); + if (tag_len == 0) { + set_err(err_msg, "tag is empty after ':'"); + return -1; + } + if (!valid_tag(tag_start, tag_len)) { + set_err(err_msg, "tag has invalid characters or length"); + return -1; + } + name_len = (size_t) (tag_colon - input); + if (name_len == 0) { + set_err(err_msg, "reference has no name before ':'"); + return -1; + } + } + + /* Step 3: split name into [registry "/"] path. */ + const char *registry_start = NULL; + size_t registry_len = 0; + const char *path_start = input; + size_t path_len = name_len; + + const char *first_slash = memchr(input, '/', name_len); + if (first_slash) { + size_t head_len = (size_t) (first_slash - input); + if (looks_like_domain(input, head_len)) { + registry_start = input; + registry_len = head_len; + path_start = first_slash + 1; + path_len = name_len - head_len - 1; + if (path_len == 0) { + set_err(err_msg, "reference has no repository after registry"); + return -1; + } + } + } + + /* Step 4: validate path components and detect single-segment defaults. */ + if (!valid_repository_path(path_start, path_len)) { + set_err(err_msg, + "repository path has invalid component (lowercase letters," + " digits, '.', '_', '-' only)"); + return -1; + } + + if (registry_len > 0 && !valid_registry(registry_start, registry_len)) { + set_err(err_msg, "registry host has invalid characters"); + return -1; + } + + /* Step 5: materialise the canonical fields. */ + out->registry = registry_len > 0 + ? strndup_local(registry_start, registry_len) + : strdup(DEFAULT_REGISTRY); + if (!out->registry) + goto oom; + + bool needs_library_prefix = + strcmp(out->registry, DEFAULT_REGISTRY) == 0 && + memchr(path_start, '/', path_len) == NULL; + if (needs_library_prefix) { + size_t prefix_len = strlen(DEFAULT_LIBRARY_NAMESPACE); + size_t total_len = prefix_len + 1 + path_len; + out->repository = (char *) malloc(total_len + 1); + if (!out->repository) + goto oom; + memcpy(out->repository, DEFAULT_LIBRARY_NAMESPACE, prefix_len); + out->repository[prefix_len] = '/'; + memcpy(out->repository + prefix_len + 1, path_start, path_len); + out->repository[total_len] = '\0'; + } else { + out->repository = strndup_local(path_start, path_len); + if (!out->repository) + goto oom; + } + + if (tag_len > 0) { + out->tag = strndup_local(tag_start, tag_len); + if (!out->tag) + goto oom; + } else if (digest_len == 0) { + out->tag = strdup(DEFAULT_TAG); + if (!out->tag) + goto oom; + } + + if (digest_len > 0) { + out->digest = strndup_local(digest_start, digest_len); + if (!out->digest) + goto oom; + } + + return 0; + +oom: + set_err(err_msg, "out of memory"); + oci_ref_free(out); + return -1; +} + +char *oci_ref_canonical(const oci_ref_t *ref) +{ + if (!ref || !ref->registry || !ref->repository) + return NULL; + size_t reg_len = strlen(ref->registry); + size_t repo_len = strlen(ref->repository); + size_t tag_len = ref->tag ? strlen(ref->tag) : 0; + size_t dig_len = ref->digest ? strlen(ref->digest) : 0; + size_t total = reg_len + 1 + repo_len + (tag_len ? tag_len + 1 : 0) + + (dig_len ? dig_len + 1 : 0) + 1; + char *buf = (char *) malloc(total); + if (!buf) + return NULL; + char *p = buf; + memcpy(p, ref->registry, reg_len); + p += reg_len; + *p++ = '/'; + memcpy(p, ref->repository, repo_len); + p += repo_len; + if (tag_len) { + *p++ = ':'; + memcpy(p, ref->tag, tag_len); + p += tag_len; + } + if (dig_len) { + *p++ = '@'; + memcpy(p, ref->digest, dig_len); + p += dig_len; + } + *p = '\0'; + return buf; +} diff --git a/src/oci/ref.h b/src/oci/ref.h new file mode 100644 index 0000000..dfd1885 --- /dev/null +++ b/src/oci/ref.h @@ -0,0 +1,59 @@ +/* Parse OCI image references (REGISTRY/REPO[:TAG][@DIGEST]) + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Implements the de-facto containerd/docker reference grammar so that user + * input like alpine, alpine:3.20, myuser/myrepo:tag, ghcr.io/owner/img:tag, + * or repo@sha256: resolves to a canonical (registry, repository, tag, + * digest) tuple. Defaults match Docker conventions: bare names land under + * docker.io/library/ with tag latest. + * + * Grammar (informal): + * + * reference := name [":" tag] ["@" digest] + * name := [domain "/"] path + * domain := first slash component containing "." or ":" or == "localhost" + * path := component ("/" component)* + * component := [a-z0-9]+ ((["._-"] | "__") [a-z0-9]+)* + * tag := [A-Za-z0-9_] [A-Za-z0-9_.-]{0,127} + * digest := ("sha256" | "sha512") ":" hex (lowercase hex) + * + * Domain detection follows containerd: the first slash-separated component + * is treated as a registry only when it carries a domain marker. Bare + * single-segment names (alpine) and two-segment names (user/repo) default + * to docker.io. Single-segment defaults additionally pick up the library/ + * prefix. + */ + +#pragma once + +typedef struct { + /* Registry hostname (and optional :port). Always non-NULL after parse. */ + char *registry; + /* Repository path with namespace, e.g. "library/alpine". Always non-NULL. */ + char *repository; + /* Tag name. NULL when the reference is pinned by digest only. Defaults + * to "latest" when neither tag nor digest is present. + */ + char *tag; + /* Digest ":", or NULL. */ + char *digest; +} oci_ref_t; + +/* Parse input into out. Returns 0 on success or -1 on malformed input. On + * error, *err_msg (when err_msg != NULL) is set to a static description; the + * string must not be freed. On success the caller owns out and must call + * oci_ref_free. + */ +int oci_ref_parse(const char *input, oci_ref_t *out, const char **err_msg); + +/* Render a canonical "registry/repository[:tag][@digest]" string. Always + * heap-allocated; the caller frees. Returns NULL on allocation failure. + */ +char *oci_ref_canonical(const oci_ref_t *ref); + +/* Release any heap fields. Safe on a zero-initialised or partially populated + * struct; resets all fields to NULL. + */ +void oci_ref_free(oci_ref_t *ref); diff --git a/tests/test-oci-ref.c b/tests/test-oci-ref.c new file mode 100644 index 0000000..ff3b970 --- /dev/null +++ b/tests/test-oci-ref.c @@ -0,0 +1,282 @@ +/* OCI image reference parser unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Standalone native macOS test program (NOT a guest binary). Links directly + * against src/oci/ref.c so it does not depend on Hypervisor.framework and + * has no entitlement requirements. Each table-driven case exercises either + * a happy-path canonicalisation or a specific rejection reason. + * + * Build: see mk/tests.mk target test-oci-ref. + * Run: build/test-oci-ref + */ + +#include +#include +#include + +#include "oci/ref.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int total = 0; +static int passed = 0; + +static void report_pass(const char *name) +{ + total++; + passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *detail) +{ + total++; + printf(" " RED "FAIL" RESET " %s: %s\n", name, detail ? detail : ""); +} + +/* Compare a parsed field against an expected value. NULL on either side is + * an exact match only when both are NULL. + */ +static int field_matches(const char *got, const char *want) +{ + if (!want) + return got == NULL; + return got != NULL && strcmp(got, want) == 0; +} + +struct happy_case { + const char *name; + const char *input; + const char *want_canonical; + const char *want_registry; + const char *want_repository; + const char *want_tag; /* NULL => expect ref.tag == NULL */ + const char *want_digest; /* NULL => expect ref.digest == NULL */ +}; + +static void run_happy(const struct happy_case *c) +{ + oci_ref_t ref; + const char *err = NULL; + if (oci_ref_parse(c->input, &ref, &err) != 0) { + char detail[256]; + snprintf(detail, sizeof(detail), + "parse failed unexpectedly: input=%s err=%s", + c->input, err ? err : "(null)"); + report_fail(c->name, detail); + return; + } + char *canonical = oci_ref_canonical(&ref); + int ok = canonical && strcmp(canonical, c->want_canonical) == 0 && + field_matches(ref.registry, c->want_registry) && + field_matches(ref.repository, c->want_repository) && + field_matches(ref.tag, c->want_tag) && + field_matches(ref.digest, c->want_digest); + if (ok) { + report_pass(c->name); + } else { + char detail[1024]; + snprintf(detail, sizeof(detail), + "input=%s canonical=%s registry=%s repository=%s " + "tag=%s digest=%s", + c->input, canonical ? canonical : "(null)", + ref.registry ? ref.registry : "(null)", + ref.repository ? ref.repository : "(null)", + ref.tag ? ref.tag : "(null)", + ref.digest ? ref.digest : "(null)"); + report_fail(c->name, detail); + } + free(canonical); + oci_ref_free(&ref); +} + +struct error_case { + const char *name; + const char *input; + const char *err_substring; /* fragment that must appear in the error */ +}; + +static void run_error(const struct error_case *c) +{ + oci_ref_t ref; + const char *err = NULL; + if (oci_ref_parse(c->input, &ref, &err) == 0) { + char *canonical = oci_ref_canonical(&ref); + char detail[256]; + snprintf(detail, sizeof(detail), + "expected rejection but parsed: input=%s canonical=%s", + c->input, canonical ? canonical : "(null)"); + free(canonical); + oci_ref_free(&ref); + report_fail(c->name, detail); + return; + } + if (!err || !strstr(err, c->err_substring)) { + char detail[256]; + snprintf(detail, sizeof(detail), + "input=%s: error %s did not contain %s", + c->input, err ? err : "(null)", c->err_substring); + report_fail(c->name, detail); + return; + } + /* Confirm parse leaves the struct safely freeable even on failure. */ + oci_ref_free(&ref); + report_pass(c->name); +} + +static const char SHA256_HEX[] = + "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; +static const char SHA512_HEX[] = + "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" + "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; + +int main(void) +{ + /* Happy cases. The canonical column is the output the rest of the + * codebase will key off, so verify it explicitly per case rather than + * reconstructing it in the test. + */ + struct happy_case happy[] = { + {"bare repo defaults to docker.io/library and latest", + "alpine", + "docker.io/library/alpine:latest", + "docker.io", "library/alpine", "latest", NULL}, + {"tagged bare repo", + "alpine:3.20", + "docker.io/library/alpine:3.20", + "docker.io", "library/alpine", "3.20", NULL}, + {"two-segment repo skips library/ prefix", + "myuser/myrepo", + "docker.io/myuser/myrepo:latest", + "docker.io", "myuser/myrepo", "latest", NULL}, + {"registry with dot is detected", + "ghcr.io/owner/img:tag", + "ghcr.io/owner/img:tag", + "ghcr.io", "owner/img", "tag", NULL}, + {"localhost is detected as registry", + "localhost/repo:dev", + "localhost/repo:dev", + "localhost", "repo", "dev", NULL}, + {"registry with port preserves port", + "localhost:5000/repo:dev", + "localhost:5000/repo:dev", + "localhost:5000", "repo", "dev", NULL}, + {"digest-only ref leaves tag NULL (no latest default)", + "alpine@sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "docker.io/library/alpine@sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "docker.io", "library/alpine", NULL, "sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"}, + {"tag+digest keeps both", + "alpine:3.20@sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "docker.io/library/alpine:3.20@sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "docker.io", "library/alpine", "3.20", "sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"}, + {"sha512 digest is accepted", + "repo@sha512:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "docker.io/library/repo@sha512:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "docker.io", "library/repo", NULL, "sha512:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"}, + {"underscore separators inside path", + "library/foo_bar:tag", + "docker.io/library/foo_bar:tag", + "docker.io", "library/foo_bar", "tag", NULL}, + {"double-underscore separator", + "library/foo__bar:tag", + "docker.io/library/foo__bar:tag", + "docker.io", "library/foo__bar", "tag", NULL}, + {"hyphen and dot separators", + "library/foo-bar.baz:tag", + "docker.io/library/foo-bar.baz:tag", + "docker.io", "library/foo-bar.baz", "tag", NULL}, + {"deep nested path under custom registry", + "registry.example.com:443/team/sub/repo:1.2.3", + "registry.example.com:443/team/sub/repo:1.2.3", + "registry.example.com:443", "team/sub/repo", "1.2.3", NULL}, + {"tag containing dot, hyphen, underscore", + "alpine:1.2.3-rc1_build", + "docker.io/library/alpine:1.2.3-rc1_build", + "docker.io", "library/alpine", "1.2.3-rc1_build", NULL}, + }; + /* Suppress -Wunused-variable until the digest helper strings get used + * by future cases. They are referenced via SHA256_HEX/SHA512_HEX in + * comments above so the lengths stay in sync with the inline literals. + */ + (void) SHA256_HEX; + (void) SHA512_HEX; + + printf("oci_ref_parse happy paths\n"); + for (size_t i = 0; i < sizeof(happy) / sizeof(happy[0]); i++) + run_happy(&happy[i]); + + struct error_case errors[] = { + {"empty reference rejected", "", "empty"}, + {"NULL-input handled (substituted with empty string)", "", "empty"}, + {"uppercase in path rejected", + "Alpine", "invalid component"}, + {"trailing colon rejected", + "alpine:", "tag is empty"}, + {"trailing at sign rejected", + "alpine@", "digest is empty"}, + {"double at sign rejected", + "a@b@c", "multiple '@'"}, + {"unknown digest algorithm rejected", + "alpine@md5:0123456789abcdef0123456789abcdef", + "must be sha256 or sha512"}, + {"short sha256 digest rejected", + "alpine@sha256:cafe", "hex length"}, + {"uppercase digest hex rejected", + "alpine@sha256:ABCDEF0123456789abcdef0123456789abcdef0123456789abcdef0123456789", + "lowercase"}, + {"path component starting with separator", + "library/.foo:tag", "invalid component"}, + {"path component ending with separator", + "library/foo-:tag", "invalid component"}, + {"triple-dot separator inside component", + "library/foo...bar:tag", "invalid component"}, + {"empty path after registry", + "ghcr.io/", "no repository"}, + {"reference with no name before '@'", + "@sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "no name"}, + {"reference with no name before ':'", + ":tag", "no name"}, + {"tag too long (129 chars) rejected", + "alpine:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "invalid characters or length"}, + {"tag starting with dot rejected", + "alpine:.bad", "invalid characters"}, + {"port too long rejected", + "host:123456/repo", "invalid characters"}, + }; + + printf("oci_ref_parse error paths\n"); + for (size_t i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) + run_error(&errors[i]); + + /* NULL input must not crash. */ + { + oci_ref_t ref; + const char *err = NULL; + if (oci_ref_parse(NULL, &ref, &err) == 0) { + report_fail("NULL input rejected without crash", + "parse returned success"); + oci_ref_free(&ref); + } else if (!err || !strstr(err, "NULL")) { + report_fail("NULL input rejected without crash", + err ? err : "(null err)"); + } else { + report_pass("NULL input rejected without crash"); + } + } + + /* oci_ref_free on a zero-init struct must be safe. */ + { + oci_ref_t ref = {0}; + oci_ref_free(&ref); + report_pass("free on zero-init ref is safe"); + } + + printf("\nResults: %d/%d passed\n", passed, total); + return passed == total ? 0 : 1; +} From 43a3d387109a54108c3f88c3573a33c7556f62c3 Mon Sep 17 00:00:00 2001 From: Max042004 Date: Fri, 15 May 2026 14:46:44 +0800 Subject: [PATCH 2/7] Add OCI content-addressable blob store and SHA-256 digester Second slice of Phase 1 from issue #31. Lands the on-disk storage substrate that the upcoming registry client will spill manifests, configs, and layers into. No HTTP, no unpack, no CLI surface yet; this slice is intentionally a pure library plus offline unit tests so the storage semantics can be audited without standing up a network. src/oci/digest.{c,h} wraps CommonCrypto SHA-256 and SHA-512 in a streaming digester so multi-gigabyte layers can be hashed without buffering. Calls into CommonCrypto are clamped to 1 GiB chunks because CC_LONG is 32-bit and OCI layers can legitimately exceed that. Hex output is lowercase to match the reference parser (src/oci/ref.c); the OCI image reference grammar already rejects uppercase digest hex, so the entire pipeline -- parser, manifest fetcher, local store -- shares one canonical encoding and cannot silently miss a dedup match. A separate one-shot helper, hex validator, and ":" parser sit on top of the same streaming primitive. src/oci/blob-store.{c,h} is the content-addressable store. Layout matches the OCI image-layout convention: /blobs// for committed blobs plus /tmp/blob---XXXXXX for the in-flight staging file. mkstemp supplies global uniqueness; an in-process counter is added to the template so failures of the rand pool cannot defeat in-process disambiguation. The commit path hashes streamed bytes, fsyncs the staging file, and uses link(2) rather than rename(2) to publish the final inode. link returning EEXIST is the dedup hit signal: two writers racing on the same digest both unlink their staging files and report success, because the content is by definition identical when the digest matched. Digest mismatch returns -1 with errno EINVAL and unlinks the staging file, so an interrupted or hostile pull never leaves a visible-complete blob behind. The abort path takes the same cleanup. STORE_PATH_MAX is set comfortably above PATH_MAX so snprintf truncation cannot silently corrupt a path; callers passing smaller buffers still detect overflow via the return value. Per oci-roadmap.md Q1, the store will eventually sit on a case-sensitive APFS sparse volume managed by elfuse, but the volume bootstrap is its own later slice. For now the store API takes a plain directory path; the same API survives the volume migration unchanged. tests/test-oci-digest.c exercises 25 cases: NIST FIPS-180-4 vectors (empty, "abc", 56-byte, one-million-'a') for both SHA-256 and SHA-512, the same one-million-'a' streamed in 4 KiB and 17-byte chunks to lock down the chunking loop, hex validator boundary cases, and every ":" parse rejection (missing colon, unknown algorithm, short hex, uppercase hex, NULL input). NULL and zero- length updates must be safe and must not perturb the running state. tests/test-oci-blob-store.c drives 14 cases inside an mkdtemp scratch directory: layout creation, idempotent reopen, path() formatting, one-shot put + has() round-trip, dedup commit leaves the same inode, digest mismatch is rejected with EINVAL and tmp/ stays empty, streaming writer over multiple chunks, abort leaves no leftover, and close + reopen still sees the committed blob (issue #31 DoD: "store survives restart"). dir_is_empty / path_is_dir / path_is_file helpers keep the assertions terse. Makefile adds oci/digest.c and oci/blob-store.c to SRCS, plus the two new native-test link rules. mk/config.mk extends NATIVE_TESTS so the cross-compile pattern rule does not pick the new tests up. mk/tests.mk exposes test-oci-digest and test-oci-blob-store as phony targets and runs them as the final two stages of make check, beside the existing test-oci-ref stage. All 39 (25 + 14) new assertions pass; the rest of make check stays green (unit suite 81 passed / 0 failed, busybox, proctitle, procfs-exec, timeout-disable, OCI-ref 34/34). --- Makefile | 14 +- mk/config.mk | 3 +- mk/tests.mk | 16 +- src/oci/blob-store.c | 399 ++++++++++++++++++++++++++++++++++++ src/oci/blob-store.h | 99 +++++++++ src/oci/digest.c | 207 +++++++++++++++++++ src/oci/digest.h | 92 +++++++++ tests/test-oci-blob-store.c | 363 ++++++++++++++++++++++++++++++++ tests/test-oci-digest.c | 296 ++++++++++++++++++++++++++ 9 files changed, 1486 insertions(+), 3 deletions(-) create mode 100644 src/oci/blob-store.c create mode 100644 src/oci/blob-store.h create mode 100644 src/oci/digest.c create mode 100644 src/oci/digest.h create mode 100644 tests/test-oci-blob-store.c create mode 100644 tests/test-oci-digest.c diff --git a/Makefile b/Makefile index 9187bed..066ab36 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,9 @@ SRCS := \ debug/gdbstub-rsp.c \ debug/log.c \ oci/ref.c \ - oci/cli.c + oci/cli.c \ + oci/digest.c \ + oci/blob-store.c SRCS := $(addprefix src/,$(SRCS)) OBJS := $(patsubst src/%.c,$(BUILD_DIR)/%.o,$(SRCS)) @@ -136,6 +138,16 @@ $(BUILD_DIR)/test-oci-ref: $(BUILD_DIR)/test-oci-ref.o $(BUILD_DIR)/oci/ref.o | @echo " LD $@" $(Q)$(CC) $(CFLAGS) -o $@ $^ +## Build the OCI digest unit test (native macOS binary). Pure C, no HVF. +$(BUILD_DIR)/test-oci-digest: $(BUILD_DIR)/test-oci-digest.o $(BUILD_DIR)/oci/digest.o | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ + +## Build the OCI blob store unit test (native macOS binary). Pure C, no HVF. +$(BUILD_DIR)/test-oci-blob-store: $(BUILD_DIR)/test-oci-blob-store.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ + # ── Guest test binaries (cross-compiled, aarch64-linux) ────────── # Only used when GUEST_TEST_BINARIES is not set. diff --git a/mk/config.mk b/mk/config.mk index e0a3dcb..9b7067f 100644 --- a/mk/config.mk +++ b/mk/config.mk @@ -15,7 +15,8 @@ ifeq ($(origin GUEST_TEST_BINARIES), undefined) endif # Exclude native macOS test files from cross-compilation -NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c tests/test-oci-ref.c +NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c tests/test-oci-ref.c \ + tests/test-oci-digest.c tests/test-oci-blob-store.c SPECIAL_TEST_SRCS := tests/test-lowbase-mem.c SPECIAL_TEST_BINS := $(BUILD_DIR)/test-lowbase-mem-200000 $(BUILD_DIR)/test-lowbase-mem-300000 diff --git a/mk/tests.mk b/mk/tests.mk index 01cf141..fc2f935 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -5,7 +5,9 @@ test-dynamic test-dynamic-coreutils test-glibc-dynamic \ test-glibc-coreutils test-perf \ test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \ - test-full test-multi-vcpu test-rwx test-oci-ref test-sysroot-rename \ + test-full test-multi-vcpu test-rwx \ + test-oci-ref test-oci-digest test-oci-blob-store \ + test-sysroot-rename \ test-case-collision test-case-collision-fallback test-sysroot-create-paths \ test-proctitle-low-stack \ test-sysroot-procfs-exec test-timeout-disable \ @@ -33,11 +35,23 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage @$(MAKE) --no-print-directory test-timeout-disable @printf "\n$(BLUE)━━━ OCI reference parser unit tests ━━━$(RESET)\n" @$(MAKE) --no-print-directory test-oci-ref + @printf "\n$(BLUE)━━━ OCI digest unit tests ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-digest + @printf "\n$(BLUE)━━━ OCI blob store unit tests ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-blob-store ## Run the OCI image reference parser unit tests (native, no HVF) test-oci-ref: $(BUILD_DIR)/test-oci-ref @$(BUILD_DIR)/test-oci-ref +## Run the OCI digest unit tests (native, no HVF) +test-oci-digest: $(BUILD_DIR)/test-oci-digest + @$(BUILD_DIR)/test-oci-digest + +## Run the OCI blob store unit tests (native, no HVF) +test-oci-blob-store: $(BUILD_DIR)/test-oci-blob-store + @$(BUILD_DIR)/test-oci-blob-store + test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename @tmpdir=$$(mktemp -d); \ trap 'rm -rf "$$tmpdir"; rm -f /tmp/elfuse-sysroot-rename-dst.txt' EXIT; \ diff --git a/src/oci/blob-store.c b/src/oci/blob-store.c new file mode 100644 index 0000000..cf40b4a --- /dev/null +++ b/src/oci/blob-store.c @@ -0,0 +1,399 @@ +/* Content-addressable blob store for OCI image data + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The commit path uses link(2) rather than rename(2) so that a second writer + * racing on the same digest cannot silently overwrite a blob that another + * process already finalized. link returning EEXIST is treated as a dedup + * hit; both clients then unlink their staging file and report success. This + * matches the content-addressable invariant: identical bytes map to one + * inode, regardless of how many concurrent writers raced to produce them. + */ + +#include "blob-store.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "digest.h" + +/* Largest path the store will materialize. Comfortably above PATH_MAX so + * snprintf truncation never silently corrupts a path; callers that pass an + * out_size smaller than this can still recover via the returned length. + */ +#define STORE_PATH_MAX 4096 + +struct oci_blob_store { + char *root; +}; + +struct oci_blob_writer { + oci_blob_store_t *store; + oci_digest_algo_t algo; + char expected_hex[OCI_DIGEST_HEX_MAX + 1]; + char tmp_path[STORE_PATH_MAX]; + int fd; + oci_digester_t *digester; + bool failed; +}; + +static int mkdir_one(const char *path) +{ + if (mkdir(path, 0755) == 0) + return 0; + if (errno == EEXIST) { + struct stat st; + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) + return 0; + errno = ENOTDIR; + return -1; + } + return -1; +} + +/* Create every directory along path. Walks component by component so that a + * missing intermediate directory does not abort the whole open. path must + * fit in STORE_PATH_MAX; the caller is responsible for upstream length + * checks (only internal call sites build these paths from store->root plus + * fixed suffixes, all of which stay well under the limit). + */ +static int mkdir_p(const char *path) +{ + char buf[STORE_PATH_MAX]; + size_t len = strlen(path); + if (len == 0 || len >= sizeof(buf)) { + errno = ENAMETOOLONG; + return -1; + } + memcpy(buf, path, len + 1); + + for (size_t i = 1; i < len; i++) { + if (buf[i] != '/') + continue; + buf[i] = '\0'; + if (mkdir_one(buf) < 0) + return -1; + buf[i] = '/'; + } + return mkdir_one(buf); +} + +static int join2(char *out, size_t out_size, const char *a, const char *b) +{ + int n = snprintf(out, out_size, "%s/%s", a, b); + if (n < 0 || (size_t) n >= out_size) { + errno = ENAMETOOLONG; + return -1; + } + return n; +} + +static int ensure_layout(const char *root) +{ + char path[STORE_PATH_MAX]; + if (mkdir_p(root) < 0) + return -1; + if (join2(path, sizeof(path), root, "blobs") < 0 || mkdir_one(path) < 0) + return -1; + if (join2(path, sizeof(path), root, "tmp") < 0 || mkdir_one(path) < 0) + return -1; + + static const char *const algos[] = {"sha256", "sha512"}; + for (size_t i = 0; i < sizeof(algos) / sizeof(algos[0]); i++) { + int n = snprintf(path, sizeof(path), "%s/blobs/%s", root, algos[i]); + if (n < 0 || (size_t) n >= sizeof(path)) { + errno = ENAMETOOLONG; + return -1; + } + if (mkdir_one(path) < 0) + return -1; + } + return 0; +} + +oci_blob_store_t *oci_blob_store_open(const char *root) +{ + if (!root || !*root) { + errno = EINVAL; + return NULL; + } + if (ensure_layout(root) < 0) + return NULL; + + oci_blob_store_t *s = calloc(1, sizeof(*s)); + if (!s) + return NULL; + s->root = strdup(root); + if (!s->root) { + free(s); + return NULL; + } + return s; +} + +void oci_blob_store_close(oci_blob_store_t *s) +{ + if (!s) + return; + free(s->root); + free(s); +} + +int oci_blob_store_path(const oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *hex, + char *out, + size_t out_size) +{ + if (!s || !out || out_size == 0) { + if (out && out_size) + out[0] = '\0'; + return -1; + } + const char *name = oci_digest_algo_name(algo); + if (!name || !oci_digest_hex_valid(algo, hex)) { + out[0] = '\0'; + return -1; + } + int n = snprintf(out, out_size, "%s/blobs/%s/%s", s->root, name, hex); + if (n < 0) { + out[0] = '\0'; + return -1; + } + return n; +} + +bool oci_blob_store_has(const oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *hex) +{ + char path[STORE_PATH_MAX]; + int n = oci_blob_store_path(s, algo, hex, path, sizeof(path)); + if (n < 0 || (size_t) n >= sizeof(path)) + return false; + struct stat st; + return stat(path, &st) == 0 && S_ISREG(st.st_mode); +} + +/* Monotonic counter used to disambiguate concurrent staging files within the + * same process. mkstemp itself supplies the global uniqueness via the random + * XXXXXX suffix; the counter is here only so that read-modify failures of + * the rand pool cannot defeat in-process uniqueness. + */ +static unsigned long writer_seq(void) +{ + static unsigned long n = 0; + return __sync_add_and_fetch(&n, 1); +} + +oci_blob_writer_t *oci_blob_writer_begin(oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *expected_hex) +{ + if (!s || !oci_digest_hex_valid(algo, expected_hex)) { + errno = EINVAL; + return NULL; + } + + oci_blob_writer_t *w = calloc(1, sizeof(*w)); + if (!w) + return NULL; + w->store = s; + w->algo = algo; + memcpy(w->expected_hex, expected_hex, oci_digest_hex_len(algo) + 1); + w->fd = -1; + + int n = snprintf(w->tmp_path, sizeof(w->tmp_path), + "%s/tmp/blob-%ld-%lu-XXXXXX", + s->root, (long) getpid(), writer_seq()); + if (n < 0 || (size_t) n >= sizeof(w->tmp_path)) { + free(w); + errno = ENAMETOOLONG; + return NULL; + } + + int fd = mkstemp(w->tmp_path); + if (fd < 0) { + int saved = errno; + free(w); + errno = saved; + return NULL; + } + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + if (fchmod(fd, 0644) < 0) { + int saved = errno; + (void) close(fd); + (void) unlink(w->tmp_path); + free(w); + errno = saved; + return NULL; + } + w->fd = fd; + + w->digester = oci_digester_new(algo); + if (!w->digester) { + int saved = errno ? errno : ENOMEM; + (void) close(w->fd); + (void) unlink(w->tmp_path); + free(w); + errno = saved; + return NULL; + } + return w; +} + +bool oci_blob_writer_write(oci_blob_writer_t *w, const void *buf, size_t len) +{ + if (!w || w->failed || (!buf && len)) { + if (w) + w->failed = true; + errno = EINVAL; + return false; + } + const uint8_t *p = buf; + while (len > 0) { + ssize_t n = write(w->fd, p, len); + if (n < 0) { + if (errno == EINTR) + continue; + w->failed = true; + return false; + } + if (n == 0) { + w->failed = true; + errno = EIO; + return false; + } + oci_digester_update(w->digester, p, (size_t) n); + p += n; + len -= (size_t) n; + } + return true; +} + +/* Discard staging file, free fd and digester. Errno is preserved across the + * cleanup so the caller can return its own diagnostic. + */ +static void writer_cleanup_fail(oci_blob_writer_t *w) +{ + int saved = errno; + if (w->fd >= 0) + (void) close(w->fd); + (void) unlink(w->tmp_path); + oci_digester_free(w->digester); + free(w); + errno = saved; +} + +int oci_blob_writer_commit(oci_blob_writer_t *w) +{ + if (!w) { + errno = EINVAL; + return -1; + } + if (w->failed) { + writer_cleanup_fail(w); + errno = EIO; + return -1; + } + + char got_hex[OCI_DIGEST_HEX_MAX + 1]; + if (oci_digester_finish_hex(w->digester, got_hex) == 0) { + writer_cleanup_fail(w); + errno = EIO; + return -1; + } + oci_digester_free(w->digester); + w->digester = NULL; + + if (strcmp(got_hex, w->expected_hex) != 0) { + if (w->fd >= 0) + (void) close(w->fd); + (void) unlink(w->tmp_path); + free(w); + errno = EINVAL; + return -1; + } + + if (fsync(w->fd) < 0) { + int saved = errno; + (void) close(w->fd); + (void) unlink(w->tmp_path); + free(w); + errno = saved; + return -1; + } + if (close(w->fd) < 0) { + int saved = errno; + w->fd = -1; + (void) unlink(w->tmp_path); + free(w); + errno = saved; + return -1; + } + w->fd = -1; + + char final_path[STORE_PATH_MAX]; + int n = oci_blob_store_path(w->store, w->algo, w->expected_hex, final_path, + sizeof(final_path)); + if (n < 0 || (size_t) n >= sizeof(final_path)) { + (void) unlink(w->tmp_path); + free(w); + errno = ENAMETOOLONG; + return -1; + } + + if (link(w->tmp_path, final_path) < 0) { + if (errno != EEXIST) { + int saved = errno; + (void) unlink(w->tmp_path); + free(w); + errno = saved; + return -1; + } + /* Dedup hit: another writer beat this one. Content is identical + * because the digest matched, so dropping the staging file is the + * correct action. + */ + } + (void) unlink(w->tmp_path); + free(w); + return 0; +} + +void oci_blob_writer_abort(oci_blob_writer_t *w) +{ + if (!w) + return; + if (w->fd >= 0) + (void) close(w->fd); + (void) unlink(w->tmp_path); + oci_digester_free(w->digester); + free(w); +} + +int oci_blob_store_put_bytes(oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *expected_hex, + const void *buf, + size_t len) +{ + oci_blob_writer_t *w = oci_blob_writer_begin(s, algo, expected_hex); + if (!w) + return -1; + if (!oci_blob_writer_write(w, buf, len)) { + int saved = errno; + oci_blob_writer_abort(w); + errno = saved; + return -1; + } + return oci_blob_writer_commit(w); +} diff --git a/src/oci/blob-store.h b/src/oci/blob-store.h new file mode 100644 index 0000000..117e7f5 --- /dev/null +++ b/src/oci/blob-store.h @@ -0,0 +1,99 @@ +/* Content-addressable blob store for OCI image data + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Layout matches the OCI image-layout convention: + * + * /blobs// finalized blob, immutable + * /tmp/blob-- in-flight staging file + * + * Every blob is committed by writing the staging file, fsync'ing it, hashing + * the bytes as they stream through the writer, comparing the actual hex to + * the expected hex from the manifest descriptor, and then atomically renaming + * the staging file into its final blobs// slot. A digest mismatch + * unlinks the staging file before returning -1, so an interrupted or hostile + * pull leaves no visible-complete blob behind. Repeated commits of the same + * digest are dedup'd in place (final path already exists -> drop staging, + * report success). + * + * The store path is opaque to this module; the caller picks it. Phase 1 + * targets ~/Library/Application Support/elfuse/blobs/ on macOS; a later + * slice moves the root onto a case-sensitive APFS sparse volume (oci-roadmap + * Q1) but the store API does not change. + */ + +#pragma once + +#include +#include + +#include "digest.h" + +typedef struct oci_blob_store oci_blob_store_t; +typedef struct oci_blob_writer oci_blob_writer_t; + +/* Open or create the store rooted at `root`. The directory tree (root, + * blobs/, tmp) is created with mode 0755 if missing. Returns NULL on + * failure with errno preserved. + */ +oci_blob_store_t *oci_blob_store_open(const char *root); + +/* Release the store handle. Does not delete on-disk state. Safe on NULL. */ +void oci_blob_store_close(oci_blob_store_t *s); + +/* Resolve the final on-disk path for algo:hex. Returns the number of bytes + * the full path occupies excluding the trailing NUL, or -1 if algo or hex + * is malformed. Always writes a NUL terminator when out_size > 0; if the + * full path does not fit, out is truncated but still NUL-terminated and the + * caller can detect overflow by comparing the return value to out_size. + */ +int oci_blob_store_path(const oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *hex, + char *out, + size_t out_size); + +/* True when blobs// exists as a regular file. */ +bool oci_blob_store_has(const oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *hex); + +/* Begin a streaming write keyed by the descriptor digest. The writer hashes + * payload bytes as they stream and verifies the result against expected_hex + * during commit. Returns NULL on failure with errno preserved. expected_hex + * must be lowercase and the correct length for algo. + */ +oci_blob_writer_t *oci_blob_writer_begin(oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *expected_hex); + +/* Append data to the staging file and the running digest. Returns true on + * success or false on a short write / I/O error with errno preserved. On + * failure the writer is left in a state where the only valid next call is + * oci_blob_writer_abort. + */ +bool oci_blob_writer_write(oci_blob_writer_t *w, const void *buf, size_t len); + +/* Finalize the digest, fsync, verify against expected_hex, then atomically + * rename into place. On success returns 0 and releases the writer. On digest + * mismatch returns -1 with errno set to EINVAL. On I/O failure returns -1 + * with errno preserved. The staging file is always unlinked on failure so + * an aborted pull never leaves a visible-complete blob. + */ +int oci_blob_writer_commit(oci_blob_writer_t *w); + +/* Discard the staging file and release the writer. Always succeeds; safe on + * NULL. + */ +void oci_blob_writer_abort(oci_blob_writer_t *w); + +/* One-shot helper: write a memory buffer into the store. Returns 0 on + * success or -1 on failure (errno preserved); semantics match the streaming + * commit path including dedup and atomic rename. + */ +int oci_blob_store_put_bytes(oci_blob_store_t *s, + oci_digest_algo_t algo, + const char *expected_hex, + const void *buf, + size_t len); diff --git a/src/oci/digest.c b/src/oci/digest.c new file mode 100644 index 0000000..131cae9 --- /dev/null +++ b/src/oci/digest.c @@ -0,0 +1,207 @@ +/* Content digests for OCI image blobs + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "digest.h" + +#include +#include +#include +#include + +/* CC_LONG is 32-bit; clamp every update call so multi-gigabyte layers cannot + * overflow the CommonCrypto length argument silently. 1 GiB is well below the + * limit and large enough that the per-call overhead is negligible. + */ +#define DIGESTER_CHUNK_MAX ((size_t) (1u << 30)) + +struct oci_digester { + oci_digest_algo_t algo; + union { + CC_SHA256_CTX sha256; + CC_SHA512_CTX sha512; + } ctx; +}; + +static const char HEX_LOWER[] = "0123456789abcdef"; + +static void bin_to_hex_lower(const uint8_t *bin, size_t bin_len, char *out) +{ + for (size_t i = 0; i < bin_len; i++) { + out[i * 2] = HEX_LOWER[(bin[i] >> 4) & 0xf]; + out[i * 2 + 1] = HEX_LOWER[bin[i] & 0xf]; + } + out[bin_len * 2] = '\0'; +} + +const char *oci_digest_algo_name(oci_digest_algo_t algo) +{ + switch (algo) { + case OCI_DIGEST_SHA256: + return "sha256"; + case OCI_DIGEST_SHA512: + return "sha512"; + } + return NULL; +} + +size_t oci_digest_hex_len(oci_digest_algo_t algo) +{ + switch (algo) { + case OCI_DIGEST_SHA256: + return OCI_DIGEST_SHA256_HEX_LEN; + case OCI_DIGEST_SHA512: + return OCI_DIGEST_SHA512_HEX_LEN; + } + return 0; +} + +bool oci_digest_algo_from_name(const char *name, oci_digest_algo_t *algo) +{ + if (!name || !algo) + return false; + if (!strcmp(name, "sha256")) { + *algo = OCI_DIGEST_SHA256; + return true; + } + if (!strcmp(name, "sha512")) { + *algo = OCI_DIGEST_SHA512; + return true; + } + return false; +} + +bool oci_digest_hex_valid(oci_digest_algo_t algo, const char *hex) +{ + if (!hex) + return false; + size_t want = oci_digest_hex_len(algo); + if (want == 0) + return false; + if (strlen(hex) != want) + return false; + for (size_t i = 0; i < want; i++) { + char c = hex[i]; + bool ok = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); + if (!ok) + return false; + } + return true; +} + +bool oci_digest_parse(const char *colon_form, + oci_digest_algo_t *out_algo, + char *out_hex) +{ + if (!colon_form || !out_algo || !out_hex) + return false; + + out_hex[0] = '\0'; + const char *colon = strchr(colon_form, ':'); + if (!colon || colon == colon_form) + return false; + + char name[8]; + size_t name_len = (size_t) (colon - colon_form); + if (name_len >= sizeof(name)) + return false; + memcpy(name, colon_form, name_len); + name[name_len] = '\0'; + + oci_digest_algo_t algo; + if (!oci_digest_algo_from_name(name, &algo)) + return false; + + const char *hex = colon + 1; + if (!oci_digest_hex_valid(algo, hex)) + return false; + + *out_algo = algo; + memcpy(out_hex, hex, oci_digest_hex_len(algo) + 1); + return true; +} + +oci_digester_t *oci_digester_new(oci_digest_algo_t algo) +{ + oci_digester_t *d = calloc(1, sizeof(*d)); + if (!d) + return NULL; + d->algo = algo; + switch (algo) { + case OCI_DIGEST_SHA256: + (void) CC_SHA256_Init(&d->ctx.sha256); + break; + case OCI_DIGEST_SHA512: + (void) CC_SHA512_Init(&d->ctx.sha512); + break; + default: + free(d); + return NULL; + } + return d; +} + +void oci_digester_free(oci_digester_t *d) +{ + free(d); +} + +void oci_digester_update(oci_digester_t *d, const void *buf, size_t len) +{ + if (!d || !buf || len == 0) + return; + const uint8_t *p = buf; + while (len > 0) { + size_t chunk = len > DIGESTER_CHUNK_MAX ? DIGESTER_CHUNK_MAX : len; + switch (d->algo) { + case OCI_DIGEST_SHA256: + (void) CC_SHA256_Update(&d->ctx.sha256, p, (CC_LONG) chunk); + break; + case OCI_DIGEST_SHA512: + (void) CC_SHA512_Update(&d->ctx.sha512, p, (CC_LONG) chunk); + break; + } + p += chunk; + len -= chunk; + } +} + +size_t oci_digester_finish_hex(oci_digester_t *d, char *out_hex) +{ + if (!d || !out_hex) + return 0; + uint8_t md[CC_SHA512_DIGEST_LENGTH]; + size_t bin_len = 0; + switch (d->algo) { + case OCI_DIGEST_SHA256: + (void) CC_SHA256_Final(md, &d->ctx.sha256); + bin_len = CC_SHA256_DIGEST_LENGTH; + break; + case OCI_DIGEST_SHA512: + (void) CC_SHA512_Final(md, &d->ctx.sha512); + bin_len = CC_SHA512_DIGEST_LENGTH; + break; + default: + return 0; + } + bin_to_hex_lower(md, bin_len, out_hex); + return bin_len * 2; +} + +size_t oci_digest_bytes(oci_digest_algo_t algo, + const void *buf, + size_t len, + char *out_hex) +{ + if (!out_hex) + return 0; + oci_digester_t *d = oci_digester_new(algo); + if (!d) + return 0; + oci_digester_update(d, buf, len); + size_t n = oci_digester_finish_hex(d, out_hex); + oci_digester_free(d); + return n; +} diff --git a/src/oci/digest.h b/src/oci/digest.h new file mode 100644 index 0000000..bf9bb0e --- /dev/null +++ b/src/oci/digest.h @@ -0,0 +1,92 @@ +/* Content digests for OCI image blobs + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Wraps macOS CommonCrypto SHA-256 and SHA-512 in a streaming API so the + * blob store and registry client can hash gigabyte-class layer downloads + * without ever buffering the full payload in memory. + * + * Hex output is always lowercase; the OCI image reference parser already + * rejects uppercase digest hex (see src/oci/ref.c), so every digest hex that + * flows between the parser, the manifest fetcher, and the local store must + * stay in the same canonical encoding to avoid silent dedup misses. + */ + +#pragma once + +#include +#include + +typedef enum { + OCI_DIGEST_SHA256, + OCI_DIGEST_SHA512, +} oci_digest_algo_t; + +/* Hex length per algorithm, excluding the trailing NUL. */ +#define OCI_DIGEST_SHA256_HEX_LEN 64 +#define OCI_DIGEST_SHA512_HEX_LEN 128 +#define OCI_DIGEST_HEX_MAX OCI_DIGEST_SHA512_HEX_LEN + +/* Opaque streaming digest. Allocated on the heap because the underlying + * CommonCrypto context is moderately sized (SHA-512 keeps an 80-word state) + * and callers tend to thread a digester pointer through several modules. + */ +typedef struct oci_digester oci_digester_t; + +/* Allocate a streaming digester for algo. Returns NULL on bad enum or oom. */ +oci_digester_t *oci_digester_new(oci_digest_algo_t algo); + +/* Release a digester. Safe on NULL. */ +void oci_digester_free(oci_digester_t *d); + +/* Append data. Splits large buffers into CC_LONG-sized chunks internally + * because CommonCrypto's update takes a uint32_t length and OCI layers can + * exceed 4 GiB. + */ +void oci_digester_update(oci_digester_t *d, const void *buf, size_t len); + +/* Finalize and write the lowercase hex string to out_hex. out_hex must hold + * at least OCI_DIGEST_HEX_MAX + 1 bytes. Returns the hex length on success + * (without trailing NUL) or 0 if d is NULL. The digester is consumed by this + * call: the only valid next operation is oci_digester_free. + */ +size_t oci_digester_finish_hex(oci_digester_t *d, char *out_hex); + +/* Lookup the algorithm name string ("sha256" / "sha512"). Returns NULL when + * algo is out of range. The returned pointer is to static storage. + */ +const char *oci_digest_algo_name(oci_digest_algo_t algo); + +/* Expected hex length for an algorithm (without trailing NUL). Returns 0 on + * bad enum. + */ +size_t oci_digest_hex_len(oci_digest_algo_t algo); + +/* Parse an algorithm name. Returns true and writes algo on match; false on + * unknown name. + */ +bool oci_digest_algo_from_name(const char *name, oci_digest_algo_t *algo); + +/* Validate that hex is exactly oci_digest_hex_len(algo) characters and that + * every character is a lowercase hex digit. Rejects NULL. + */ +bool oci_digest_hex_valid(oci_digest_algo_t algo, const char *hex); + +/* Parse ":" into algo and a canonical lowercase hex copy. The + * input hex must already be lowercase; mixed case is rejected to match the + * reference parser. out_hex must hold OCI_DIGEST_HEX_MAX + 1 bytes. On + * success returns true; otherwise returns false and out_hex is left zeroed. + */ +bool oci_digest_parse(const char *colon_form, + oci_digest_algo_t *out_algo, + char *out_hex); + +/* One-shot helper: compute algo over buf/len and emit lowercase hex into + * out_hex (which must hold OCI_DIGEST_HEX_MAX + 1 bytes). Returns the hex + * length on success or 0 on bad enum / NULL output. + */ +size_t oci_digest_bytes(oci_digest_algo_t algo, + const void *buf, + size_t len, + char *out_hex); diff --git a/tests/test-oci-blob-store.c b/tests/test-oci-blob-store.c new file mode 100644 index 0000000..75c59b9 --- /dev/null +++ b/tests/test-oci-blob-store.c @@ -0,0 +1,363 @@ +/* OCI content-addressable blob store unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Native macOS test program. Drives every documented store invariant from + * the open path (layout creation), through one-shot and streaming commits, + * digest mismatch rejection, dedup, abort, and store-survives-restart, all + * inside an mkdtemp scratch directory that is wiped on exit. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oci/blob-store.h" +#include "oci/digest.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int total = 0; +static int passed = 0; + +static void report_pass(const char *name) +{ + total++; + passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *detail) +{ + total++; + printf(" " RED "FAIL" RESET " %s: %s\n", name, detail ? detail : ""); +} + +/* Pre-computed SHA-256 of the byte string "abc". Same as the one verified by + * test-oci-digest, so the two suites cross-reference each other. + */ +static const char SHA256_ABC[] = + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"; + +static int remove_entry(const char *path, + const struct stat *st, + int typeflag, + struct FTW *ftwbuf) +{ + (void) st; + (void) typeflag; + (void) ftwbuf; + return remove(path); +} + +static void wipe_dir(const char *root) +{ + /* FTW_DEPTH guarantees children are processed before parents so rmdir + * does not race against still-populated directories. + */ + (void) nftw(root, remove_entry, 8, FTW_DEPTH | FTW_PHYS); +} + +static bool dir_is_empty(const char *path) +{ + DIR *dir = opendir(path); + if (!dir) + return false; + bool empty = true; + struct dirent *e; + while ((e = readdir(dir))) { + if (strcmp(e->d_name, ".") == 0 || strcmp(e->d_name, "..") == 0) + continue; + empty = false; + break; + } + closedir(dir); + return empty; +} + +static bool path_is_dir(const char *path) +{ + struct stat st; + return stat(path, &st) == 0 && S_ISDIR(st.st_mode); +} + +static bool path_is_file(const char *path) +{ + struct stat st; + return stat(path, &st) == 0 && S_ISREG(st.st_mode); +} + +static char *make_scratch_root(void) +{ + char *tmpl = strdup("/tmp/elfuse-oci-blob-XXXXXX"); + if (!tmpl) + return NULL; + if (!mkdtemp(tmpl)) { + free(tmpl); + return NULL; + } + return tmpl; +} + +int main(void) +{ + char *scratch = make_scratch_root(); + if (!scratch) { + fprintf(stderr, "mkdtemp failed: %s\n", strerror(errno)); + return 1; + } + + /* Layout creation: open on a fresh dir must produce blobs/sha256, + * blobs/sha512, and tmp under root. + */ + char store_root[512]; + snprintf(store_root, sizeof(store_root), "%s/store", scratch); + + printf("oci_blob_store layout\n"); + oci_blob_store_t *s = oci_blob_store_open(store_root); + if (!s) { + report_fail("open creates layout", + strerror(errno)); + goto cleanup; + } + { + char p[512]; + snprintf(p, sizeof(p), "%s/blobs/sha256", store_root); + bool ok_sha256 = path_is_dir(p); + snprintf(p, sizeof(p), "%s/blobs/sha512", store_root); + bool ok_sha512 = path_is_dir(p); + snprintf(p, sizeof(p), "%s/tmp", store_root); + bool ok_tmp = path_is_dir(p); + if (ok_sha256 && ok_sha512 && ok_tmp) + report_pass("open creates blobs/sha256, blobs/sha512, tmp"); + else + report_fail("open creates blobs/sha256, blobs/sha512, tmp", NULL); + } + + /* Reopening an already-populated root is idempotent. */ + { + oci_blob_store_t *again = oci_blob_store_open(store_root); + if (again) { + report_pass("open is idempotent on existing layout"); + oci_blob_store_close(again); + } else { + report_fail("open is idempotent on existing layout", + strerror(errno)); + } + } + + /* Bad inputs. */ + { + errno = 0; + oci_blob_store_t *bad = oci_blob_store_open(NULL); + if (!bad && errno == EINVAL) + report_pass("open rejects NULL root"); + else + report_fail("open rejects NULL root", + bad ? "returned handle" : strerror(errno)); + oci_blob_store_close(bad); + } + { + errno = 0; + oci_blob_store_t *bad = oci_blob_store_open(""); + if (!bad && errno == EINVAL) + report_pass("open rejects empty root"); + else + report_fail("open rejects empty root", + bad ? "returned handle" : strerror(errno)); + oci_blob_store_close(bad); + } + + /* Path resolution: shape matches the OCI image-layout convention. */ + printf("oci_blob_store_path\n"); + { + char out[512]; + int n = oci_blob_store_path(s, OCI_DIGEST_SHA256, SHA256_ABC, out, + sizeof(out)); + char want[512]; + snprintf(want, sizeof(want), "%s/blobs/sha256/%s", store_root, + SHA256_ABC); + if (n > 0 && (size_t) n == strlen(want) && strcmp(out, want) == 0) + report_pass("path builds blobs//"); + else + report_fail("path builds blobs//", out); + } + { + char out[512]; + int n = oci_blob_store_path(s, OCI_DIGEST_SHA256, "not-hex", out, + sizeof(out)); + if (n == -1) + report_pass("path rejects malformed hex"); + else + report_fail("path rejects malformed hex", out); + } + + /* One-shot put followed by has() round trip. */ + printf("oci_blob_store_put_bytes\n"); + { + if (oci_blob_store_put_bytes(s, OCI_DIGEST_SHA256, SHA256_ABC, "abc", + 3) != 0) { + report_fail("put_bytes commits a known-good blob", strerror(errno)); + } else { + char path[512]; + oci_blob_store_path(s, OCI_DIGEST_SHA256, SHA256_ABC, path, + sizeof(path)); + if (path_is_file(path) && + oci_blob_store_has(s, OCI_DIGEST_SHA256, SHA256_ABC)) + report_pass("put_bytes commits a known-good blob"); + else + report_fail("put_bytes commits a known-good blob", + "blob not visible after commit"); + } + char tmp_dir[512]; + snprintf(tmp_dir, sizeof(tmp_dir), "%s/tmp", store_root); + if (dir_is_empty(tmp_dir)) + report_pass("commit leaves tmp/ empty"); + else + report_fail("commit leaves tmp/ empty", NULL); + } + + /* Dedup: repeat the same commit and confirm exit success without + * touching the final inode. The fact that we observe the same path with + * the same content is enough; the writer's link(2) path takes the EEXIST + * branch internally. + */ + { + struct stat before, after; + char path[512]; + oci_blob_store_path(s, OCI_DIGEST_SHA256, SHA256_ABC, path, + sizeof(path)); + if (stat(path, &before) != 0) { + report_fail("dedup commit is idempotent", "no first blob"); + } else if (oci_blob_store_put_bytes(s, OCI_DIGEST_SHA256, SHA256_ABC, + "abc", 3) != 0) { + report_fail("dedup commit is idempotent", strerror(errno)); + } else if (stat(path, &after) != 0) { + report_fail("dedup commit is idempotent", "blob disappeared"); + } else if (before.st_ino != after.st_ino) { + report_fail("dedup commit is idempotent", + "inode changed (should stay the same)"); + } else { + report_pass("dedup commit is idempotent"); + } + } + + /* Digest mismatch: caller declares a hex that does not match the bytes. + * Commit must fail with EINVAL and leave no visible blob, no tmp leftover. + */ + { + static const char WRONG[] = + "0000000000000000000000000000000000000000000000000000000000000000"; + errno = 0; + int rc = oci_blob_store_put_bytes(s, OCI_DIGEST_SHA256, WRONG, "abc", + 3); + char tmp_dir[512]; + snprintf(tmp_dir, sizeof(tmp_dir), "%s/tmp", store_root); + if (rc == -1 && errno == EINVAL && + !oci_blob_store_has(s, OCI_DIGEST_SHA256, WRONG) && + dir_is_empty(tmp_dir)) + report_pass("digest mismatch rejected, tmp/ stays empty"); + else + report_fail("digest mismatch rejected, tmp/ stays empty", + strerror(errno)); + } + + /* Streaming writer: write the same bytes in multiple chunks and confirm + * the commit hash still matches. + */ + printf("oci_blob_writer streaming\n"); + { + /* SHA-256("hello world") = b94d27b9... */ + const char *payload = "hello world"; + char expected[OCI_DIGEST_HEX_MAX + 1]; + oci_digest_bytes(OCI_DIGEST_SHA256, payload, strlen(payload), expected); + + oci_blob_writer_t *w = + oci_blob_writer_begin(s, OCI_DIGEST_SHA256, expected); + if (!w) { + report_fail("streaming writer commits chunked payload", + strerror(errno)); + } else if (!oci_blob_writer_write(w, "hello ", 6) || + !oci_blob_writer_write(w, "world", 5)) { + report_fail("streaming writer commits chunked payload", + strerror(errno)); + oci_blob_writer_abort(w); + } else if (oci_blob_writer_commit(w) != 0) { + report_fail("streaming writer commits chunked payload", + strerror(errno)); + } else if (!oci_blob_store_has(s, OCI_DIGEST_SHA256, expected)) { + report_fail("streaming writer commits chunked payload", + "not visible after commit"); + } else { + report_pass("streaming writer commits chunked payload"); + } + } + + /* Abort path: write some data, abort, confirm no committed blob and no + * tmp leftover. + */ + { + static const char EXPECTED[] = + "deadbeef00000000000000000000000000000000000000000000000000000000"; + oci_blob_writer_t *w = + oci_blob_writer_begin(s, OCI_DIGEST_SHA256, EXPECTED); + if (!w) { + report_fail("abort leaves no leftover", strerror(errno)); + } else { + (void) oci_blob_writer_write(w, "partial", 7); + oci_blob_writer_abort(w); + char tmp_dir[512]; + snprintf(tmp_dir, sizeof(tmp_dir), "%s/tmp", store_root); + if (!oci_blob_store_has(s, OCI_DIGEST_SHA256, EXPECTED) && + dir_is_empty(tmp_dir)) + report_pass("abort leaves no leftover"); + else + report_fail("abort leaves no leftover", NULL); + } + } + + /* Restart: close the store handle, reopen the same root, confirm the + * committed blob is still visible. This is the "store survives restart" + * acceptance criterion from issue #31. + */ + printf("oci_blob_store restart\n"); + oci_blob_store_close(s); + s = oci_blob_store_open(store_root); + if (!s) { + report_fail("reopen sees previously-committed blob", strerror(errno)); + goto cleanup; + } + if (oci_blob_store_has(s, OCI_DIGEST_SHA256, SHA256_ABC)) + report_pass("reopen sees previously-committed blob"); + else + report_fail("reopen sees previously-committed blob", + "has() returned false"); + + /* has() must distinguish present vs absent. */ + { + static const char ABSENT[] = + "feedface00000000000000000000000000000000000000000000000000000000"; + if (!oci_blob_store_has(s, OCI_DIGEST_SHA256, ABSENT)) + report_pass("has() returns false for unknown digest"); + else + report_fail("has() returns false for unknown digest", NULL); + } + +cleanup: + oci_blob_store_close(s); + wipe_dir(scratch); + free(scratch); + + printf("\nResults: %d/%d passed\n", passed, total); + return passed == total ? 0 : 1; +} diff --git a/tests/test-oci-digest.c b/tests/test-oci-digest.c new file mode 100644 index 0000000..a98b52e --- /dev/null +++ b/tests/test-oci-digest.c @@ -0,0 +1,296 @@ +/* OCI digest module unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Native macOS test program. Links directly against src/oci/digest.c (which + * uses CommonCrypto). Verifies the streaming and one-shot APIs against the + * NIST FIPS-180-4 published SHA-256 and SHA-512 vectors so any future + * regression in the chunking or hex encoder shows up immediately. + */ + +#include +#include +#include + +#include "oci/digest.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int total = 0; +static int passed = 0; + +static void report_pass(const char *name) +{ + total++; + passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *detail) +{ + total++; + printf(" " RED "FAIL" RESET " %s: %s\n", name, detail ? detail : ""); +} + +static void check_one_shot(const char *name, + oci_digest_algo_t algo, + const void *buf, + size_t len, + const char *want_hex) +{ + char got[OCI_DIGEST_HEX_MAX + 1]; + size_t n = oci_digest_bytes(algo, buf, len, got); + if (n == 0) { + report_fail(name, "oci_digest_bytes returned 0"); + return; + } + if (strcmp(got, want_hex) != 0) { + char detail[512]; + snprintf(detail, sizeof(detail), "got=%s want=%s", got, want_hex); + report_fail(name, detail); + return; + } + report_pass(name); +} + +static void check_streaming(const char *name, + oci_digest_algo_t algo, + const char *want_hex, + const void *buf, + size_t len, + size_t chunk) +{ + oci_digester_t *d = oci_digester_new(algo); + if (!d) { + report_fail(name, "digester_new returned NULL"); + return; + } + const unsigned char *p = buf; + while (len > 0) { + size_t step = len < chunk ? len : chunk; + oci_digester_update(d, p, step); + p += step; + len -= step; + } + char got[OCI_DIGEST_HEX_MAX + 1]; + size_t n = oci_digester_finish_hex(d, got); + oci_digester_free(d); + if (n == 0) { + report_fail(name, "finish_hex returned 0"); + return; + } + if (strcmp(got, want_hex) != 0) { + char detail[512]; + snprintf(detail, sizeof(detail), "got=%s want=%s", got, want_hex); + report_fail(name, detail); + return; + } + report_pass(name); +} + +/* SHA-256 of the empty string, "abc", the canonical 56-byte test vector, and + * the standard 1 MiB 'a' marathon vector. Source: NIST FIPS 180-4 examples + * and the test vector pages collected by NIST CAVP. Kept inline so the test + * binary stays self-contained and offline. + */ +static const char SHA256_EMPTY[] = + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; +static const char SHA256_ABC[] = + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"; +static const char SHA256_56[] = + "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1"; +static const char SHA256_MILLION_A[] = + "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0"; + +static const char SHA512_EMPTY[] = + "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce" + "47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"; +static const char SHA512_ABC[] = + "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a" + "2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"; + +static const char STR_56[] = + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"; + +static const char VALID_SHA256_HEX[] = + "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; + +int main(void) +{ + printf("oci_digest one-shot vectors\n"); + check_one_shot("sha256(\"\")", OCI_DIGEST_SHA256, "", 0, SHA256_EMPTY); + check_one_shot("sha256(\"abc\")", OCI_DIGEST_SHA256, "abc", 3, SHA256_ABC); + check_one_shot("sha256(56-byte vector)", OCI_DIGEST_SHA256, STR_56, + sizeof(STR_56) - 1, SHA256_56); + check_one_shot("sha512(\"\")", OCI_DIGEST_SHA512, "", 0, SHA512_EMPTY); + check_one_shot("sha512(\"abc\")", OCI_DIGEST_SHA512, "abc", 3, SHA512_ABC); + + /* The 1 MiB 'a' vector verifies that the chunking loop inside + * oci_digester_update produces the same hash as a one-shot call. Build + * the buffer dynamically so the test source does not balloon. + */ + printf("oci_digest streaming\n"); + const size_t million = 1000000; + char *blob = malloc(million); + if (!blob) { + fprintf(stderr, "alloc million bytes failed\n"); + return 1; + } + memset(blob, 'a', million); + check_one_shot("sha256(1M 'a' one-shot)", OCI_DIGEST_SHA256, blob, million, + SHA256_MILLION_A); + check_streaming("sha256(1M 'a' streamed in 4 KiB chunks)", + OCI_DIGEST_SHA256, SHA256_MILLION_A, blob, million, 4096); + check_streaming("sha256(1M 'a' streamed in 17-byte chunks)", + OCI_DIGEST_SHA256, SHA256_MILLION_A, blob, million, 17); + free(blob); + + /* Boundary calls: NULL / zero-length updates must not crash and must not + * corrupt the running state. + */ + { + oci_digester_t *d = oci_digester_new(OCI_DIGEST_SHA256); + oci_digester_update(d, NULL, 0); + oci_digester_update(d, "", 0); + oci_digester_update(d, NULL, 7); /* len ignored when buf NULL */ + char got[OCI_DIGEST_HEX_MAX + 1]; + oci_digester_update(d, "abc", 3); + oci_digester_finish_hex(d, got); + oci_digester_free(d); + if (strcmp(got, SHA256_ABC) == 0) + report_pass("update tolerates NULL / zero-length"); + else + report_fail("update tolerates NULL / zero-length", got); + } + + printf("oci_digest_hex_valid\n"); + if (oci_digest_hex_valid(OCI_DIGEST_SHA256, VALID_SHA256_HEX)) + report_pass("accepts canonical sha256 hex"); + else + report_fail("accepts canonical sha256 hex", NULL); + + if (!oci_digest_hex_valid(OCI_DIGEST_SHA256, NULL)) + report_pass("rejects NULL hex"); + else + report_fail("rejects NULL hex", NULL); + + if (!oci_digest_hex_valid( + OCI_DIGEST_SHA256, + "0123456789ABCDEF0123456789abcdef0123456789abcdef0123456789abcdef")) + report_pass("rejects uppercase hex"); + else + report_fail("rejects uppercase hex", NULL); + + if (!oci_digest_hex_valid(OCI_DIGEST_SHA256, "deadbeef")) + report_pass("rejects short hex"); + else + report_fail("rejects short hex", NULL); + + if (!oci_digest_hex_valid( + OCI_DIGEST_SHA256, + "g123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef")) + report_pass("rejects non-hex char"); + else + report_fail("rejects non-hex char", NULL); + + if (!oci_digest_hex_valid(OCI_DIGEST_SHA512, VALID_SHA256_HEX)) + report_pass("rejects sha256-length hex against sha512"); + else + report_fail("rejects sha256-length hex against sha512", NULL); + + printf("oci_digest_parse\n"); + { + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + char input[256]; + snprintf(input, sizeof(input), "sha256:%s", VALID_SHA256_HEX); + if (oci_digest_parse(input, &algo, hex) && algo == OCI_DIGEST_SHA256 && + strcmp(hex, VALID_SHA256_HEX) == 0) + report_pass("parse sha256 form"); + else + report_fail("parse sha256 form", hex); + } + { + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse("md5:deadbeef", &algo, hex)) + report_pass("parse rejects unknown algo"); + else + report_fail("parse rejects unknown algo", NULL); + } + { + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse("sha256-no-colon", &algo, hex)) + report_pass("parse rejects missing colon"); + else + report_fail("parse rejects missing colon", NULL); + } + { + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse("sha256:short", &algo, hex)) + report_pass("parse rejects short hex"); + else + report_fail("parse rejects short hex", NULL); + } + { + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + char buf[256]; + snprintf(buf, sizeof(buf), "sha256:%s", + "0123456789ABCDEF0123456789abcdef0123456789abcdef0123456789a" + "bcdef"); + if (!oci_digest_parse(buf, &algo, hex)) + report_pass("parse rejects uppercase hex"); + else + report_fail("parse rejects uppercase hex", NULL); + } + { + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse(NULL, &algo, hex)) + report_pass("parse rejects NULL input"); + else + report_fail("parse rejects NULL input", NULL); + } + + /* Algo name lookups stay in sync with the enum values. */ + if (oci_digest_algo_name(OCI_DIGEST_SHA256) && + strcmp(oci_digest_algo_name(OCI_DIGEST_SHA256), "sha256") == 0) + report_pass("algo_name maps sha256"); + else + report_fail("algo_name maps sha256", NULL); + + if (oci_digest_algo_name(OCI_DIGEST_SHA512) && + strcmp(oci_digest_algo_name(OCI_DIGEST_SHA512), "sha512") == 0) + report_pass("algo_name maps sha512"); + else + report_fail("algo_name maps sha512", NULL); + + if (oci_digest_hex_len(OCI_DIGEST_SHA256) == OCI_DIGEST_SHA256_HEX_LEN && + oci_digest_hex_len(OCI_DIGEST_SHA512) == OCI_DIGEST_SHA512_HEX_LEN) + report_pass("hex_len matches public constants"); + else + report_fail("hex_len matches public constants", NULL); + + { + oci_digest_algo_t algo; + if (oci_digest_algo_from_name("sha256", &algo) && + algo == OCI_DIGEST_SHA256 && + oci_digest_algo_from_name("sha512", &algo) && + algo == OCI_DIGEST_SHA512 && + !oci_digest_algo_from_name("sha1", &algo) && + !oci_digest_algo_from_name(NULL, &algo)) + report_pass("algo_from_name accepts known and rejects unknown"); + else + report_fail("algo_from_name accepts known and rejects unknown", + NULL); + } + + printf("\nResults: %d/%d passed\n", passed, total); + return passed == total ? 0 : 1; +} From 9bf71416ef7d3e60d83ffc51cfe8b4de709ed996 Mon Sep 17 00:00:00 2001 From: Max042004 Date: Fri, 15 May 2026 15:10:05 +0800 Subject: [PATCH 3/7] Add OCI manifest, image-index, and image-config parsers Third slice of Phase 1 from issue #31. Lands the JSON deserialization substrate the upcoming registry client will run every fetched manifest, index, and config blob through. No HTTP, no unpack, no CLI surface yet; this slice is intentionally a pure offline library plus a 76-case unit test driven by inline JSON fixtures so the parse contract is auditable without standing up a network. externals/cjson/ vendors cJSON v1.7.18 verbatim (MIT-licensed, single .c/.h pair) per oci-roadmap.md Q9. No local modifications; future security updates re-fetch via the three curl commands in externals/cjson/VENDORING.md. .gitignore switches from ignoring all of externals/ to ignoring externals/* with an explicit !externals/cjson/ exception so the vendored tree stays tracked while the downloaded test fixtures stay out of git. The Makefile compiles cJSON with the same project CFLAGS the rest of the codebase uses; cJSON happens to be clean under -Wall -Wextra -Wpedantic on this version, so no per-file warning override is required. src/oci/media-type.{c,h} is the canonical enum + table for every OCI and Docker media type the manifest/index/config/layer code branches on. Foreign (nondistributable) layers are recognized and distinguishable so the parser can name the actual offending layer type instead of collapsing them to a generic "unknown", but the supported-layer predicate excludes them per oci-roadmap.md Q3 (elfuse cannot fetch the out-of-band payload they reference). The parser strips charset/boundary parameters and surrounding whitespace before lookup so the registry's Content-Type header value canonicalizes the same way the manifest's mediaType JSON field does. src/oci/manifest.{c,h} parses image manifests, image indexes, and image configs against schemaVersion 2. Every descriptor digest is validated through oci_digest_parse so a parsed oci_descriptor_t carries both the original ":" string and a populated (algo, hex[]) pair the blob store from slice 2 can consume directly. Size fields go through a fractional-part / negative / round-trip-precision check because cJSON returns numbers in a double; the parser rejects sizes beyond 2**53 - 1 where IEEE 754 precision starts dropping integers and rejects fractional sizes that would otherwise truncate silently to a near-but-wrong integer. Manifest config descriptors are required to carry a config media type, layer descriptors must carry a layer media type, and foreign layers are rejected with a precise error. Image configs require rootfs.type == "layers" (the only value the OCI image-spec defines) and validate every rootfs.diff_ids entry as a lowercase digest. Platform fields default empty variant / os.version strings to "" rather than NULL so the selector can use unconditional strcmp. oci_index_pick_linux_arm64 prefers variant "v8", then empty variant, then any other arm64 variant. It also skips entries whose manifest media type is not recognized -- even when the platform matches, the registry-fetch path cannot consume the resulting manifest, so picking such an entry would only defer a failure. tests/test-oci-manifest.c exercises 76 cases inline: every recognized media type lookup, charset/whitespace stripping, NULL and bogus strings, every predicate, both compression results; OCI and Docker happy-path manifest parses with two-layer gzip + zstd mix; the seven manifest rejection paths (malformed JSON, schemaVersion != 2, missing config, uppercase digest, negative size, fractional size, foreign layer, non-config media type on the config descriptor); the four index paths (multi-arch v8 wins; no-v8 picks empty variant over v7; no linux/arm64 returns NULL; Docker manifest list; unknown manifest mediaType is recorded but the selector skips it); and the four image config paths (happy with User/Env/Entrypoint/Cmd/WorkingDir/diff_ids; missing rootfs; non-layers rootfs.type; malformed diff_id). Makefile / mk/config.mk / mk/tests.mk wire the new translation units into elfuse's link line, add oci/media-type.o + oci/manifest.o + the vendored cJSON object, register tests/test-oci-manifest.c in NATIVE_TESTS so the cross-compile pattern rule does not pick it up, and run the new test as the final stage of make check beside the existing test-oci-ref / test-oci-digest / test-oci-blob-store stages. All 76 new assertions pass; the rest of make check stays green (unit suite 81 passed / 0 failed / 3 skipped, busybox, proctitle, procfs-exec, timeout-disable, OCI-ref 34/34, OCI-digest 25/25, OCI-blob-store 14/14). elfuse oci pull / prune / list still return rc=2; wiring the parser into the CLI is gated on slice 4 (HTTPS + token challenge + blob fetch). The parsers exist now so that work can land without also adding deserialization. --- .gitignore | 7 +- Makefile | 21 +- externals/cjson/LICENSE | 20 + externals/cjson/VENDORING.md | 35 + externals/cjson/cJSON.c | 3143 ++++++++++++++++++++++++++++++++++ externals/cjson/cJSON.h | 300 ++++ mk/config.mk | 3 +- mk/tests.mk | 8 +- src/oci/manifest.c | 707 ++++++++ src/oci/manifest.h | 160 ++ src/oci/media-type.c | 189 ++ src/oci/media-type.h | 93 + tests/test-oci-manifest.c | 748 ++++++++ 13 files changed, 5430 insertions(+), 4 deletions(-) create mode 100644 externals/cjson/LICENSE create mode 100644 externals/cjson/VENDORING.md create mode 100644 externals/cjson/cJSON.c create mode 100644 externals/cjson/cJSON.h create mode 100644 src/oci/manifest.c create mode 100644 src/oci/manifest.h create mode 100644 src/oci/media-type.c create mode 100644 src/oci/media-type.h create mode 100644 tests/test-oci-manifest.c diff --git a/.gitignore b/.gitignore index 7426f7e..0ee7591 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,11 @@ build/ archive/ -externals/ +# externals/ holds downloaded fixtures (kernel, rootfs, packages) that are +# fetched on demand; tracking them in git would balloon the repo. The +# vendored cJSON tree is an exception: it ships with the source so the +# OCI parser builds out of the box. +externals/* +!externals/cjson/ lib/modules/ *.o *.bin diff --git a/Makefile b/Makefile index 066ab36..089570b 100644 --- a/Makefile +++ b/Makefile @@ -67,11 +67,25 @@ SRCS := \ oci/ref.c \ oci/cli.c \ oci/digest.c \ - oci/blob-store.c + oci/blob-store.c \ + oci/media-type.c \ + oci/manifest.c SRCS := $(addprefix src/,$(SRCS)) OBJS := $(patsubst src/%.c,$(BUILD_DIR)/%.o,$(SRCS)) +# Vendored cJSON: third-party MIT JSON parser pinned at v1.7.18. Only OCI +# translation units include it. Compiles cleanly with the project warning +# posture, so no per-file CFLAGS override is required. +CJSON_DIR := externals/cjson +CJSON_OBJ := $(BUILD_DIR)/externals/cjson/cJSON.o +OBJS += $(CJSON_OBJ) + +$(CJSON_OBJ): $(CJSON_DIR)/cJSON.c $(CJSON_DIR)/cJSON.h | $(BUILD_DIR) + @mkdir -p $(dir $@) + @echo " CC $<" + $(Q)$(CC) $(CFLAGS) -c -o $@ $< + DISPATCH_MANIFEST := src/syscall/dispatch.tbl DISPATCH_GENERATOR := scripts/gen-syscall-dispatch.py DISPATCH_HEADER := $(BUILD_DIR)/dispatch.h @@ -148,6 +162,11 @@ $(BUILD_DIR)/test-oci-blob-store: $(BUILD_DIR)/test-oci-blob-store.o $(BUILD_DIR @echo " LD $@" $(Q)$(CC) $(CFLAGS) -o $@ $^ +## Build the OCI manifest / index / config parser unit test (native, no HVF). +$(BUILD_DIR)/test-oci-manifest: $(BUILD_DIR)/test-oci-manifest.o $(BUILD_DIR)/oci/manifest.o $(BUILD_DIR)/oci/media-type.o $(BUILD_DIR)/oci/digest.o $(CJSON_OBJ) | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ + # ── Guest test binaries (cross-compiled, aarch64-linux) ────────── # Only used when GUEST_TEST_BINARIES is not set. diff --git a/externals/cjson/LICENSE b/externals/cjson/LICENSE new file mode 100644 index 0000000..78deb04 --- /dev/null +++ b/externals/cjson/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2009-2017 Dave Gamble and cJSON contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/externals/cjson/VENDORING.md b/externals/cjson/VENDORING.md new file mode 100644 index 0000000..ad70115 --- /dev/null +++ b/externals/cjson/VENDORING.md @@ -0,0 +1,35 @@ +# Vendored cJSON + +This directory contains a vendored copy of [cJSON](https://github.com/DaveGamble/cJSON), +the ultralightweight JSON parser written in ANSI C. cJSON ships as a single +`.c` / `.h` pair and is dual-licensed under the MIT license (see `LICENSE`). + +## Why vendored + +`oci-roadmap.md` Q9 commits Phase 1 to hand-rolled C alongside the existing +elfuse codebase: no Go, no Rust, no `cargo` / `go` in the build matrix. cJSON +is the smallest credible JSON dependency that fits that contract; it is +self-contained, has no external dependencies, and compiles cleanly with +`clang` and `gcc` on macOS and Linux. + +## Version + +Pinned to upstream tag `v1.7.18` (2024-07-30). Fetched with: + +``` +curl -fsSL -o cJSON.h https://raw.githubusercontent.com/DaveGamble/cJSON/v1.7.18/cJSON.h +curl -fsSL -o cJSON.c https://raw.githubusercontent.com/DaveGamble/cJSON/v1.7.18/cJSON.c +curl -fsSL -o LICENSE https://raw.githubusercontent.com/DaveGamble/cJSON/v1.7.18/LICENSE +``` + +## Local modifications + +None. The files are byte-identical to the upstream tag so future security +updates can be applied by re-running the curl commands above. + +## Build integration + +The Makefile compiles `cJSON.c` with project warning flags relaxed: cJSON is +third-party code and its style does not match elfuse's `-Wpedantic +-Wmissing-prototypes -Wshadow` posture. Only `src/oci/` translation units +include `externals/cjson/cJSON.h`; the rest of the codebase never sees it. diff --git a/externals/cjson/cJSON.c b/externals/cjson/cJSON.c new file mode 100644 index 0000000..61483d9 --- /dev/null +++ b/externals/cjson/cJSON.c @@ -0,0 +1,3143 @@ +/* + Copyright (c) 2009-2017 Dave Gamble and cJSON contributors + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* cJSON */ +/* JSON parser in C. */ + +/* disable warnings about old C89 functions in MSVC */ +#if !defined(_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) +#define _CRT_SECURE_NO_DEPRECATE +#endif + +#ifdef __GNUC__ +#pragma GCC visibility push(default) +#endif +#if defined(_MSC_VER) +#pragma warning (push) +/* disable warning about single line comments in system headers */ +#pragma warning (disable : 4001) +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_LOCALES +#include +#endif + +#if defined(_MSC_VER) +#pragma warning (pop) +#endif +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + +#include "cJSON.h" + +/* define our own boolean type */ +#ifdef true +#undef true +#endif +#define true ((cJSON_bool)1) + +#ifdef false +#undef false +#endif +#define false ((cJSON_bool)0) + +/* define isnan and isinf for ANSI C, if in C99 or above, isnan and isinf has been defined in math.h */ +#ifndef isinf +#define isinf(d) (isnan((d - d)) && !isnan(d)) +#endif +#ifndef isnan +#define isnan(d) (d != d) +#endif + +#ifndef NAN +#ifdef _WIN32 +#define NAN sqrt(-1.0) +#else +#define NAN 0.0/0.0 +#endif +#endif + +typedef struct { + const unsigned char *json; + size_t position; +} error; +static error global_error = { NULL, 0 }; + +CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void) +{ + return (const char*) (global_error.json + global_error.position); +} + +CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON * const item) +{ + if (!cJSON_IsString(item)) + { + return NULL; + } + + return item->valuestring; +} + +CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON * const item) +{ + if (!cJSON_IsNumber(item)) + { + return (double) NAN; + } + + return item->valuedouble; +} + +/* This is a safeguard to prevent copy-pasters from using incompatible C and header files */ +#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || (CJSON_VERSION_PATCH != 18) + #error cJSON.h and cJSON.c have different versions. Make sure that both have the same. +#endif + +CJSON_PUBLIC(const char*) cJSON_Version(void) +{ + static char version[15]; + sprintf(version, "%i.%i.%i", CJSON_VERSION_MAJOR, CJSON_VERSION_MINOR, CJSON_VERSION_PATCH); + + return version; +} + +/* Case insensitive string comparison, doesn't consider two NULL pointers equal though */ +static int case_insensitive_strcmp(const unsigned char *string1, const unsigned char *string2) +{ + if ((string1 == NULL) || (string2 == NULL)) + { + return 1; + } + + if (string1 == string2) + { + return 0; + } + + for(; tolower(*string1) == tolower(*string2); (void)string1++, string2++) + { + if (*string1 == '\0') + { + return 0; + } + } + + return tolower(*string1) - tolower(*string2); +} + +typedef struct internal_hooks +{ + void *(CJSON_CDECL *allocate)(size_t size); + void (CJSON_CDECL *deallocate)(void *pointer); + void *(CJSON_CDECL *reallocate)(void *pointer, size_t size); +} internal_hooks; + +#if defined(_MSC_VER) +/* work around MSVC error C2322: '...' address of dllimport '...' is not static */ +static void * CJSON_CDECL internal_malloc(size_t size) +{ + return malloc(size); +} +static void CJSON_CDECL internal_free(void *pointer) +{ + free(pointer); +} +static void * CJSON_CDECL internal_realloc(void *pointer, size_t size) +{ + return realloc(pointer, size); +} +#else +#define internal_malloc malloc +#define internal_free free +#define internal_realloc realloc +#endif + +/* strlen of character literals resolved at compile time */ +#define static_strlen(string_literal) (sizeof(string_literal) - sizeof("")) + +static internal_hooks global_hooks = { internal_malloc, internal_free, internal_realloc }; + +static unsigned char* cJSON_strdup(const unsigned char* string, const internal_hooks * const hooks) +{ + size_t length = 0; + unsigned char *copy = NULL; + + if (string == NULL) + { + return NULL; + } + + length = strlen((const char*)string) + sizeof(""); + copy = (unsigned char*)hooks->allocate(length); + if (copy == NULL) + { + return NULL; + } + memcpy(copy, string, length); + + return copy; +} + +CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks) +{ + if (hooks == NULL) + { + /* Reset hooks */ + global_hooks.allocate = malloc; + global_hooks.deallocate = free; + global_hooks.reallocate = realloc; + return; + } + + global_hooks.allocate = malloc; + if (hooks->malloc_fn != NULL) + { + global_hooks.allocate = hooks->malloc_fn; + } + + global_hooks.deallocate = free; + if (hooks->free_fn != NULL) + { + global_hooks.deallocate = hooks->free_fn; + } + + /* use realloc only if both free and malloc are used */ + global_hooks.reallocate = NULL; + if ((global_hooks.allocate == malloc) && (global_hooks.deallocate == free)) + { + global_hooks.reallocate = realloc; + } +} + +/* Internal constructor. */ +static cJSON *cJSON_New_Item(const internal_hooks * const hooks) +{ + cJSON* node = (cJSON*)hooks->allocate(sizeof(cJSON)); + if (node) + { + memset(node, '\0', sizeof(cJSON)); + } + + return node; +} + +/* Delete a cJSON structure. */ +CJSON_PUBLIC(void) cJSON_Delete(cJSON *item) +{ + cJSON *next = NULL; + while (item != NULL) + { + next = item->next; + if (!(item->type & cJSON_IsReference) && (item->child != NULL)) + { + cJSON_Delete(item->child); + } + if (!(item->type & cJSON_IsReference) && (item->valuestring != NULL)) + { + global_hooks.deallocate(item->valuestring); + item->valuestring = NULL; + } + if (!(item->type & cJSON_StringIsConst) && (item->string != NULL)) + { + global_hooks.deallocate(item->string); + item->string = NULL; + } + global_hooks.deallocate(item); + item = next; + } +} + +/* get the decimal point character of the current locale */ +static unsigned char get_decimal_point(void) +{ +#ifdef ENABLE_LOCALES + struct lconv *lconv = localeconv(); + return (unsigned char) lconv->decimal_point[0]; +#else + return '.'; +#endif +} + +typedef struct +{ + const unsigned char *content; + size_t length; + size_t offset; + size_t depth; /* How deeply nested (in arrays/objects) is the input at the current offset. */ + internal_hooks hooks; +} parse_buffer; + +/* check if the given size is left to read in a given parse buffer (starting with 1) */ +#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) <= (buffer)->length)) +/* check if the buffer can be accessed at the given index (starting with 0) */ +#define can_access_at_index(buffer, index) ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length)) +#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index)) +/* get a pointer to the buffer at the position */ +#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset) + +/* Parse the input text to generate a number, and populate the result into item. */ +static cJSON_bool parse_number(cJSON * const item, parse_buffer * const input_buffer) +{ + double number = 0; + unsigned char *after_end = NULL; + unsigned char number_c_string[64]; + unsigned char decimal_point = get_decimal_point(); + size_t i = 0; + + if ((input_buffer == NULL) || (input_buffer->content == NULL)) + { + return false; + } + + /* copy the number into a temporary buffer and replace '.' with the decimal point + * of the current locale (for strtod) + * This also takes care of '\0' not necessarily being available for marking the end of the input */ + for (i = 0; (i < (sizeof(number_c_string) - 1)) && can_access_at_index(input_buffer, i); i++) + { + switch (buffer_at_offset(input_buffer)[i]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '+': + case '-': + case 'e': + case 'E': + number_c_string[i] = buffer_at_offset(input_buffer)[i]; + break; + + case '.': + number_c_string[i] = decimal_point; + break; + + default: + goto loop_end; + } + } +loop_end: + number_c_string[i] = '\0'; + + number = strtod((const char*)number_c_string, (char**)&after_end); + if (number_c_string == after_end) + { + return false; /* parse_error */ + } + + item->valuedouble = number; + + /* use saturation in case of overflow */ + if (number >= INT_MAX) + { + item->valueint = INT_MAX; + } + else if (number <= (double)INT_MIN) + { + item->valueint = INT_MIN; + } + else + { + item->valueint = (int)number; + } + + item->type = cJSON_Number; + + input_buffer->offset += (size_t)(after_end - number_c_string); + return true; +} + +/* don't ask me, but the original cJSON_SetNumberValue returns an integer or double */ +CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number) +{ + if (number >= INT_MAX) + { + object->valueint = INT_MAX; + } + else if (number <= (double)INT_MIN) + { + object->valueint = INT_MIN; + } + else + { + object->valueint = (int)number; + } + + return object->valuedouble = number; +} + +/* Note: when passing a NULL valuestring, cJSON_SetValuestring treats this as an error and return NULL */ +CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring) +{ + char *copy = NULL; + /* if object's type is not cJSON_String or is cJSON_IsReference, it should not set valuestring */ + if ((object == NULL) || !(object->type & cJSON_String) || (object->type & cJSON_IsReference)) + { + return NULL; + } + /* return NULL if the object is corrupted or valuestring is NULL */ + if (object->valuestring == NULL || valuestring == NULL) + { + return NULL; + } + if (strlen(valuestring) <= strlen(object->valuestring)) + { + strcpy(object->valuestring, valuestring); + return object->valuestring; + } + copy = (char*) cJSON_strdup((const unsigned char*)valuestring, &global_hooks); + if (copy == NULL) + { + return NULL; + } + if (object->valuestring != NULL) + { + cJSON_free(object->valuestring); + } + object->valuestring = copy; + + return copy; +} + +typedef struct +{ + unsigned char *buffer; + size_t length; + size_t offset; + size_t depth; /* current nesting depth (for formatted printing) */ + cJSON_bool noalloc; + cJSON_bool format; /* is this print a formatted print */ + internal_hooks hooks; +} printbuffer; + +/* realloc printbuffer if necessary to have at least "needed" bytes more */ +static unsigned char* ensure(printbuffer * const p, size_t needed) +{ + unsigned char *newbuffer = NULL; + size_t newsize = 0; + + if ((p == NULL) || (p->buffer == NULL)) + { + return NULL; + } + + if ((p->length > 0) && (p->offset >= p->length)) + { + /* make sure that offset is valid */ + return NULL; + } + + if (needed > INT_MAX) + { + /* sizes bigger than INT_MAX are currently not supported */ + return NULL; + } + + needed += p->offset + 1; + if (needed <= p->length) + { + return p->buffer + p->offset; + } + + if (p->noalloc) { + return NULL; + } + + /* calculate new buffer size */ + if (needed > (INT_MAX / 2)) + { + /* overflow of int, use INT_MAX if possible */ + if (needed <= INT_MAX) + { + newsize = INT_MAX; + } + else + { + return NULL; + } + } + else + { + newsize = needed * 2; + } + + if (p->hooks.reallocate != NULL) + { + /* reallocate with realloc if available */ + newbuffer = (unsigned char*)p->hooks.reallocate(p->buffer, newsize); + if (newbuffer == NULL) + { + p->hooks.deallocate(p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + } + else + { + /* otherwise reallocate manually */ + newbuffer = (unsigned char*)p->hooks.allocate(newsize); + if (!newbuffer) + { + p->hooks.deallocate(p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + + memcpy(newbuffer, p->buffer, p->offset + 1); + p->hooks.deallocate(p->buffer); + } + p->length = newsize; + p->buffer = newbuffer; + + return newbuffer + p->offset; +} + +/* calculate the new length of the string in a printbuffer and update the offset */ +static void update_offset(printbuffer * const buffer) +{ + const unsigned char *buffer_pointer = NULL; + if ((buffer == NULL) || (buffer->buffer == NULL)) + { + return; + } + buffer_pointer = buffer->buffer + buffer->offset; + + buffer->offset += strlen((const char*)buffer_pointer); +} + +/* securely comparison of floating-point variables */ +static cJSON_bool compare_double(double a, double b) +{ + double maxVal = fabs(a) > fabs(b) ? fabs(a) : fabs(b); + return (fabs(a - b) <= maxVal * DBL_EPSILON); +} + +/* Render the number nicely from the given item into a string. */ +static cJSON_bool print_number(const cJSON * const item, printbuffer * const output_buffer) +{ + unsigned char *output_pointer = NULL; + double d = item->valuedouble; + int length = 0; + size_t i = 0; + unsigned char number_buffer[26] = {0}; /* temporary buffer to print the number into */ + unsigned char decimal_point = get_decimal_point(); + double test = 0.0; + + if (output_buffer == NULL) + { + return false; + } + + /* This checks for NaN and Infinity */ + if (isnan(d) || isinf(d)) + { + length = sprintf((char*)number_buffer, "null"); + } + else if(d == (double)item->valueint) + { + length = sprintf((char*)number_buffer, "%d", item->valueint); + } + else + { + /* Try 15 decimal places of precision to avoid nonsignificant nonzero digits */ + length = sprintf((char*)number_buffer, "%1.15g", d); + + /* Check whether the original double can be recovered */ + if ((sscanf((char*)number_buffer, "%lg", &test) != 1) || !compare_double((double)test, d)) + { + /* If not, print with 17 decimal places of precision */ + length = sprintf((char*)number_buffer, "%1.17g", d); + } + } + + /* sprintf failed or buffer overrun occurred */ + if ((length < 0) || (length > (int)(sizeof(number_buffer) - 1))) + { + return false; + } + + /* reserve appropriate space in the output */ + output_pointer = ensure(output_buffer, (size_t)length + sizeof("")); + if (output_pointer == NULL) + { + return false; + } + + /* copy the printed number to the output and replace locale + * dependent decimal point with '.' */ + for (i = 0; i < ((size_t)length); i++) + { + if (number_buffer[i] == decimal_point) + { + output_pointer[i] = '.'; + continue; + } + + output_pointer[i] = number_buffer[i]; + } + output_pointer[i] = '\0'; + + output_buffer->offset += (size_t)length; + + return true; +} + +/* parse 4 digit hexadecimal number */ +static unsigned parse_hex4(const unsigned char * const input) +{ + unsigned int h = 0; + size_t i = 0; + + for (i = 0; i < 4; i++) + { + /* parse digit */ + if ((input[i] >= '0') && (input[i] <= '9')) + { + h += (unsigned int) input[i] - '0'; + } + else if ((input[i] >= 'A') && (input[i] <= 'F')) + { + h += (unsigned int) 10 + input[i] - 'A'; + } + else if ((input[i] >= 'a') && (input[i] <= 'f')) + { + h += (unsigned int) 10 + input[i] - 'a'; + } + else /* invalid */ + { + return 0; + } + + if (i < 3) + { + /* shift left to make place for the next nibble */ + h = h << 4; + } + } + + return h; +} + +/* converts a UTF-16 literal to UTF-8 + * A literal can be one or two sequences of the form \uXXXX */ +static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer) +{ + long unsigned int codepoint = 0; + unsigned int first_code = 0; + const unsigned char *first_sequence = input_pointer; + unsigned char utf8_length = 0; + unsigned char utf8_position = 0; + unsigned char sequence_length = 0; + unsigned char first_byte_mark = 0; + + if ((input_end - first_sequence) < 6) + { + /* input ends unexpectedly */ + goto fail; + } + + /* get the first utf16 sequence */ + first_code = parse_hex4(first_sequence + 2); + + /* check that the code is valid */ + if (((first_code >= 0xDC00) && (first_code <= 0xDFFF))) + { + goto fail; + } + + /* UTF16 surrogate pair */ + if ((first_code >= 0xD800) && (first_code <= 0xDBFF)) + { + const unsigned char *second_sequence = first_sequence + 6; + unsigned int second_code = 0; + sequence_length = 12; /* \uXXXX\uXXXX */ + + if ((input_end - second_sequence) < 6) + { + /* input ends unexpectedly */ + goto fail; + } + + if ((second_sequence[0] != '\\') || (second_sequence[1] != 'u')) + { + /* missing second half of the surrogate pair */ + goto fail; + } + + /* get the second utf16 sequence */ + second_code = parse_hex4(second_sequence + 2); + /* check that the code is valid */ + if ((second_code < 0xDC00) || (second_code > 0xDFFF)) + { + /* invalid second half of the surrogate pair */ + goto fail; + } + + + /* calculate the unicode codepoint from the surrogate pair */ + codepoint = 0x10000 + (((first_code & 0x3FF) << 10) | (second_code & 0x3FF)); + } + else + { + sequence_length = 6; /* \uXXXX */ + codepoint = first_code; + } + + /* encode as UTF-8 + * takes at maximum 4 bytes to encode: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + if (codepoint < 0x80) + { + /* normal ascii, encoding 0xxxxxxx */ + utf8_length = 1; + } + else if (codepoint < 0x800) + { + /* two bytes, encoding 110xxxxx 10xxxxxx */ + utf8_length = 2; + first_byte_mark = 0xC0; /* 11000000 */ + } + else if (codepoint < 0x10000) + { + /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */ + utf8_length = 3; + first_byte_mark = 0xE0; /* 11100000 */ + } + else if (codepoint <= 0x10FFFF) + { + /* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8_length = 4; + first_byte_mark = 0xF0; /* 11110000 */ + } + else + { + /* invalid unicode codepoint */ + goto fail; + } + + /* encode as utf8 */ + for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--) + { + /* 10xxxxxx */ + (*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF); + codepoint >>= 6; + } + /* encode first byte */ + if (utf8_length > 1) + { + (*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF); + } + else + { + (*output_pointer)[0] = (unsigned char)(codepoint & 0x7F); + } + + *output_pointer += utf8_length; + + return sequence_length; + +fail: + return 0; +} + +/* Parse the input text into an unescaped cinput, and populate item. */ +static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_buffer) +{ + const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1; + const unsigned char *input_end = buffer_at_offset(input_buffer) + 1; + unsigned char *output_pointer = NULL; + unsigned char *output = NULL; + + /* not a string */ + if (buffer_at_offset(input_buffer)[0] != '\"') + { + goto fail; + } + + { + /* calculate approximate size of the output (overestimate) */ + size_t allocation_length = 0; + size_t skipped_bytes = 0; + while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"')) + { + /* is escape sequence */ + if (input_end[0] == '\\') + { + if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length) + { + /* prevent buffer overflow when last input character is a backslash */ + goto fail; + } + skipped_bytes++; + input_end++; + } + input_end++; + } + if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"')) + { + goto fail; /* string ended unexpectedly */ + } + + /* This is at most how much we need for the output */ + allocation_length = (size_t) (input_end - buffer_at_offset(input_buffer)) - skipped_bytes; + output = (unsigned char*)input_buffer->hooks.allocate(allocation_length + sizeof("")); + if (output == NULL) + { + goto fail; /* allocation failure */ + } + } + + output_pointer = output; + /* loop through the string literal */ + while (input_pointer < input_end) + { + if (*input_pointer != '\\') + { + *output_pointer++ = *input_pointer++; + } + /* escape sequence */ + else + { + unsigned char sequence_length = 2; + if ((input_end - input_pointer) < 1) + { + goto fail; + } + + switch (input_pointer[1]) + { + case 'b': + *output_pointer++ = '\b'; + break; + case 'f': + *output_pointer++ = '\f'; + break; + case 'n': + *output_pointer++ = '\n'; + break; + case 'r': + *output_pointer++ = '\r'; + break; + case 't': + *output_pointer++ = '\t'; + break; + case '\"': + case '\\': + case '/': + *output_pointer++ = input_pointer[1]; + break; + + /* UTF-16 literal */ + case 'u': + sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer); + if (sequence_length == 0) + { + /* failed to convert UTF16-literal to UTF-8 */ + goto fail; + } + break; + + default: + goto fail; + } + input_pointer += sequence_length; + } + } + + /* zero terminate the output */ + *output_pointer = '\0'; + + item->type = cJSON_String; + item->valuestring = (char*)output; + + input_buffer->offset = (size_t) (input_end - input_buffer->content); + input_buffer->offset++; + + return true; + +fail: + if (output != NULL) + { + input_buffer->hooks.deallocate(output); + output = NULL; + } + + if (input_pointer != NULL) + { + input_buffer->offset = (size_t)(input_pointer - input_buffer->content); + } + + return false; +} + +/* Render the cstring provided to an escaped version that can be printed. */ +static cJSON_bool print_string_ptr(const unsigned char * const input, printbuffer * const output_buffer) +{ + const unsigned char *input_pointer = NULL; + unsigned char *output = NULL; + unsigned char *output_pointer = NULL; + size_t output_length = 0; + /* numbers of additional characters needed for escaping */ + size_t escape_characters = 0; + + if (output_buffer == NULL) + { + return false; + } + + /* empty string */ + if (input == NULL) + { + output = ensure(output_buffer, sizeof("\"\"")); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "\"\""); + + return true; + } + + /* set "flag" to 1 if something needs to be escaped */ + for (input_pointer = input; *input_pointer; input_pointer++) + { + switch (*input_pointer) + { + case '\"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + /* one character escape sequence */ + escape_characters++; + break; + default: + if (*input_pointer < 32) + { + /* UTF-16 escape sequence uXXXX */ + escape_characters += 5; + } + break; + } + } + output_length = (size_t)(input_pointer - input) + escape_characters; + + output = ensure(output_buffer, output_length + sizeof("\"\"")); + if (output == NULL) + { + return false; + } + + /* no characters have to be escaped */ + if (escape_characters == 0) + { + output[0] = '\"'; + memcpy(output + 1, input, output_length); + output[output_length + 1] = '\"'; + output[output_length + 2] = '\0'; + + return true; + } + + output[0] = '\"'; + output_pointer = output + 1; + /* copy the string */ + for (input_pointer = input; *input_pointer != '\0'; (void)input_pointer++, output_pointer++) + { + if ((*input_pointer > 31) && (*input_pointer != '\"') && (*input_pointer != '\\')) + { + /* normal character, copy */ + *output_pointer = *input_pointer; + } + else + { + /* character needs to be escaped */ + *output_pointer++ = '\\'; + switch (*input_pointer) + { + case '\\': + *output_pointer = '\\'; + break; + case '\"': + *output_pointer = '\"'; + break; + case '\b': + *output_pointer = 'b'; + break; + case '\f': + *output_pointer = 'f'; + break; + case '\n': + *output_pointer = 'n'; + break; + case '\r': + *output_pointer = 'r'; + break; + case '\t': + *output_pointer = 't'; + break; + default: + /* escape and print as unicode codepoint */ + sprintf((char*)output_pointer, "u%04x", *input_pointer); + output_pointer += 4; + break; + } + } + } + output[output_length + 1] = '\"'; + output[output_length + 2] = '\0'; + + return true; +} + +/* Invoke print_string_ptr (which is useful) on an item. */ +static cJSON_bool print_string(const cJSON * const item, printbuffer * const p) +{ + return print_string_ptr((unsigned char*)item->valuestring, p); +} + +/* Predeclare these prototypes. */ +static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer); +static cJSON_bool print_value(const cJSON * const item, printbuffer * const output_buffer); +static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer); +static cJSON_bool print_array(const cJSON * const item, printbuffer * const output_buffer); +static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer); +static cJSON_bool print_object(const cJSON * const item, printbuffer * const output_buffer); + +/* Utility to jump whitespace and cr/lf */ +static parse_buffer *buffer_skip_whitespace(parse_buffer * const buffer) +{ + if ((buffer == NULL) || (buffer->content == NULL)) + { + return NULL; + } + + if (cannot_access_at_index(buffer, 0)) + { + return buffer; + } + + while (can_access_at_index(buffer, 0) && (buffer_at_offset(buffer)[0] <= 32)) + { + buffer->offset++; + } + + if (buffer->offset == buffer->length) + { + buffer->offset--; + } + + return buffer; +} + +/* skip the UTF-8 BOM (byte order mark) if it is at the beginning of a buffer */ +static parse_buffer *skip_utf8_bom(parse_buffer * const buffer) +{ + if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0)) + { + return NULL; + } + + if (can_access_at_index(buffer, 4) && (strncmp((const char*)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0)) + { + buffer->offset += 3; + } + + return buffer; +} + +CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated) +{ + size_t buffer_length; + + if (NULL == value) + { + return NULL; + } + + /* Adding null character size due to require_null_terminated. */ + buffer_length = strlen(value) + sizeof(""); + + return cJSON_ParseWithLengthOpts(value, buffer_length, return_parse_end, require_null_terminated); +} + +/* Parse an object - create a new root, and populate. */ +CJSON_PUBLIC(cJSON *) cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length, const char **return_parse_end, cJSON_bool require_null_terminated) +{ + parse_buffer buffer = { 0, 0, 0, 0, { 0, 0, 0 } }; + cJSON *item = NULL; + + /* reset error position */ + global_error.json = NULL; + global_error.position = 0; + + if (value == NULL || 0 == buffer_length) + { + goto fail; + } + + buffer.content = (const unsigned char*)value; + buffer.length = buffer_length; + buffer.offset = 0; + buffer.hooks = global_hooks; + + item = cJSON_New_Item(&global_hooks); + if (item == NULL) /* memory fail */ + { + goto fail; + } + + if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer)))) + { + /* parse failure. ep is set. */ + goto fail; + } + + /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ + if (require_null_terminated) + { + buffer_skip_whitespace(&buffer); + if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0') + { + goto fail; + } + } + if (return_parse_end) + { + *return_parse_end = (const char*)buffer_at_offset(&buffer); + } + + return item; + +fail: + if (item != NULL) + { + cJSON_Delete(item); + } + + if (value != NULL) + { + error local_error; + local_error.json = (const unsigned char*)value; + local_error.position = 0; + + if (buffer.offset < buffer.length) + { + local_error.position = buffer.offset; + } + else if (buffer.length > 0) + { + local_error.position = buffer.length - 1; + } + + if (return_parse_end != NULL) + { + *return_parse_end = (const char*)local_error.json + local_error.position; + } + + global_error = local_error; + } + + return NULL; +} + +/* Default options for cJSON_Parse */ +CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value) +{ + return cJSON_ParseWithOpts(value, 0, 0); +} + +CJSON_PUBLIC(cJSON *) cJSON_ParseWithLength(const char *value, size_t buffer_length) +{ + return cJSON_ParseWithLengthOpts(value, buffer_length, 0, 0); +} + +#define cjson_min(a, b) (((a) < (b)) ? (a) : (b)) + +static unsigned char *print(const cJSON * const item, cJSON_bool format, const internal_hooks * const hooks) +{ + static const size_t default_buffer_size = 256; + printbuffer buffer[1]; + unsigned char *printed = NULL; + + memset(buffer, 0, sizeof(buffer)); + + /* create buffer */ + buffer->buffer = (unsigned char*) hooks->allocate(default_buffer_size); + buffer->length = default_buffer_size; + buffer->format = format; + buffer->hooks = *hooks; + if (buffer->buffer == NULL) + { + goto fail; + } + + /* print the value */ + if (!print_value(item, buffer)) + { + goto fail; + } + update_offset(buffer); + + /* check if reallocate is available */ + if (hooks->reallocate != NULL) + { + printed = (unsigned char*) hooks->reallocate(buffer->buffer, buffer->offset + 1); + if (printed == NULL) { + goto fail; + } + buffer->buffer = NULL; + } + else /* otherwise copy the JSON over to a new buffer */ + { + printed = (unsigned char*) hooks->allocate(buffer->offset + 1); + if (printed == NULL) + { + goto fail; + } + memcpy(printed, buffer->buffer, cjson_min(buffer->length, buffer->offset + 1)); + printed[buffer->offset] = '\0'; /* just to be sure */ + + /* free the buffer */ + hooks->deallocate(buffer->buffer); + buffer->buffer = NULL; + } + + return printed; + +fail: + if (buffer->buffer != NULL) + { + hooks->deallocate(buffer->buffer); + buffer->buffer = NULL; + } + + if (printed != NULL) + { + hooks->deallocate(printed); + printed = NULL; + } + + return NULL; +} + +/* Render a cJSON item/entity/structure to text. */ +CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item) +{ + return (char*)print(item, true, &global_hooks); +} + +CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item) +{ + return (char*)print(item, false, &global_hooks); +} + +CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt) +{ + printbuffer p = { 0, 0, 0, 0, 0, 0, { 0, 0, 0 } }; + + if (prebuffer < 0) + { + return NULL; + } + + p.buffer = (unsigned char*)global_hooks.allocate((size_t)prebuffer); + if (!p.buffer) + { + return NULL; + } + + p.length = (size_t)prebuffer; + p.offset = 0; + p.noalloc = false; + p.format = fmt; + p.hooks = global_hooks; + + if (!print_value(item, &p)) + { + global_hooks.deallocate(p.buffer); + p.buffer = NULL; + return NULL; + } + + return (char*)p.buffer; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format) +{ + printbuffer p = { 0, 0, 0, 0, 0, 0, { 0, 0, 0 } }; + + if ((length < 0) || (buffer == NULL)) + { + return false; + } + + p.buffer = (unsigned char*)buffer; + p.length = (size_t)length; + p.offset = 0; + p.noalloc = true; + p.format = format; + p.hooks = global_hooks; + + return print_value(item, &p); +} + +/* Parser core - when encountering text, process appropriately. */ +static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer) +{ + if ((input_buffer == NULL) || (input_buffer->content == NULL)) + { + return false; /* no input */ + } + + /* parse the different types of values */ + /* null */ + if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "null", 4) == 0)) + { + item->type = cJSON_NULL; + input_buffer->offset += 4; + return true; + } + /* false */ + if (can_read(input_buffer, 5) && (strncmp((const char*)buffer_at_offset(input_buffer), "false", 5) == 0)) + { + item->type = cJSON_False; + input_buffer->offset += 5; + return true; + } + /* true */ + if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "true", 4) == 0)) + { + item->type = cJSON_True; + item->valueint = 1; + input_buffer->offset += 4; + return true; + } + /* string */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"')) + { + return parse_string(item, input_buffer); + } + /* number */ + if (can_access_at_index(input_buffer, 0) && ((buffer_at_offset(input_buffer)[0] == '-') || ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9')))) + { + return parse_number(item, input_buffer); + } + /* array */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '[')) + { + return parse_array(item, input_buffer); + } + /* object */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{')) + { + return parse_object(item, input_buffer); + } + + return false; +} + +/* Render a value to text. */ +static cJSON_bool print_value(const cJSON * const item, printbuffer * const output_buffer) +{ + unsigned char *output = NULL; + + if ((item == NULL) || (output_buffer == NULL)) + { + return false; + } + + switch ((item->type) & 0xFF) + { + case cJSON_NULL: + output = ensure(output_buffer, 5); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "null"); + return true; + + case cJSON_False: + output = ensure(output_buffer, 6); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "false"); + return true; + + case cJSON_True: + output = ensure(output_buffer, 5); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "true"); + return true; + + case cJSON_Number: + return print_number(item, output_buffer); + + case cJSON_Raw: + { + size_t raw_length = 0; + if (item->valuestring == NULL) + { + return false; + } + + raw_length = strlen(item->valuestring) + sizeof(""); + output = ensure(output_buffer, raw_length); + if (output == NULL) + { + return false; + } + memcpy(output, item->valuestring, raw_length); + return true; + } + + case cJSON_String: + return print_string(item, output_buffer); + + case cJSON_Array: + return print_array(item, output_buffer); + + case cJSON_Object: + return print_object(item, output_buffer); + + default: + return false; + } +} + +/* Build an array from input text. */ +static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer) +{ + cJSON *head = NULL; /* head of the linked list */ + cJSON *current_item = NULL; + + if (input_buffer->depth >= CJSON_NESTING_LIMIT) + { + return false; /* to deeply nested */ + } + input_buffer->depth++; + + if (buffer_at_offset(input_buffer)[0] != '[') + { + /* not an array */ + goto fail; + } + + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ']')) + { + /* empty array */ + goto success; + } + + /* check if we skipped to the end of the buffer */ + if (cannot_access_at_index(input_buffer, 0)) + { + input_buffer->offset--; + goto fail; + } + + /* step back to character in front of the first element */ + input_buffer->offset--; + /* loop through the comma separated array elements */ + do + { + /* allocate next item */ + cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); + if (new_item == NULL) + { + goto fail; /* allocation failure */ + } + + /* attach next item to list */ + if (head == NULL) + { + /* start the linked list */ + current_item = head = new_item; + } + else + { + /* add to the end and advance */ + current_item->next = new_item; + new_item->prev = current_item; + current_item = new_item; + } + + /* parse next value */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_value(current_item, input_buffer)) + { + goto fail; /* failed to parse value */ + } + buffer_skip_whitespace(input_buffer); + } + while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); + + if (cannot_access_at_index(input_buffer, 0) || buffer_at_offset(input_buffer)[0] != ']') + { + goto fail; /* expected end of array */ + } + +success: + input_buffer->depth--; + + if (head != NULL) { + head->prev = current_item; + } + + item->type = cJSON_Array; + item->child = head; + + input_buffer->offset++; + + return true; + +fail: + if (head != NULL) + { + cJSON_Delete(head); + } + + return false; +} + +/* Render an array to text */ +static cJSON_bool print_array(const cJSON * const item, printbuffer * const output_buffer) +{ + unsigned char *output_pointer = NULL; + size_t length = 0; + cJSON *current_element = item->child; + + if (output_buffer == NULL) + { + return false; + } + + /* Compose the output array. */ + /* opening square bracket */ + output_pointer = ensure(output_buffer, 1); + if (output_pointer == NULL) + { + return false; + } + + *output_pointer = '['; + output_buffer->offset++; + output_buffer->depth++; + + while (current_element != NULL) + { + if (!print_value(current_element, output_buffer)) + { + return false; + } + update_offset(output_buffer); + if (current_element->next) + { + length = (size_t) (output_buffer->format ? 2 : 1); + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) + { + return false; + } + *output_pointer++ = ','; + if(output_buffer->format) + { + *output_pointer++ = ' '; + } + *output_pointer = '\0'; + output_buffer->offset += length; + } + current_element = current_element->next; + } + + output_pointer = ensure(output_buffer, 2); + if (output_pointer == NULL) + { + return false; + } + *output_pointer++ = ']'; + *output_pointer = '\0'; + output_buffer->depth--; + + return true; +} + +/* Build an object from the text. */ +static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer) +{ + cJSON *head = NULL; /* linked list head */ + cJSON *current_item = NULL; + + if (input_buffer->depth >= CJSON_NESTING_LIMIT) + { + return false; /* to deeply nested */ + } + input_buffer->depth++; + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '{')) + { + goto fail; /* not an object */ + } + + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '}')) + { + goto success; /* empty object */ + } + + /* check if we skipped to the end of the buffer */ + if (cannot_access_at_index(input_buffer, 0)) + { + input_buffer->offset--; + goto fail; + } + + /* step back to character in front of the first element */ + input_buffer->offset--; + /* loop through the comma separated array elements */ + do + { + /* allocate next item */ + cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); + if (new_item == NULL) + { + goto fail; /* allocation failure */ + } + + /* attach next item to list */ + if (head == NULL) + { + /* start the linked list */ + current_item = head = new_item; + } + else + { + /* add to the end and advance */ + current_item->next = new_item; + new_item->prev = current_item; + current_item = new_item; + } + + if (cannot_access_at_index(input_buffer, 1)) + { + goto fail; /* nothing comes after the comma */ + } + + /* parse the name of the child */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_string(current_item, input_buffer)) + { + goto fail; /* failed to parse name */ + } + buffer_skip_whitespace(input_buffer); + + /* swap valuestring and string, because we parsed the name */ + current_item->string = current_item->valuestring; + current_item->valuestring = NULL; + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != ':')) + { + goto fail; /* invalid object */ + } + + /* parse the value */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_value(current_item, input_buffer)) + { + goto fail; /* failed to parse value */ + } + buffer_skip_whitespace(input_buffer); + } + while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '}')) + { + goto fail; /* expected end of object */ + } + +success: + input_buffer->depth--; + + if (head != NULL) { + head->prev = current_item; + } + + item->type = cJSON_Object; + item->child = head; + + input_buffer->offset++; + return true; + +fail: + if (head != NULL) + { + cJSON_Delete(head); + } + + return false; +} + +/* Render an object to text. */ +static cJSON_bool print_object(const cJSON * const item, printbuffer * const output_buffer) +{ + unsigned char *output_pointer = NULL; + size_t length = 0; + cJSON *current_item = item->child; + + if (output_buffer == NULL) + { + return false; + } + + /* Compose the output: */ + length = (size_t) (output_buffer->format ? 2 : 1); /* fmt: {\n */ + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) + { + return false; + } + + *output_pointer++ = '{'; + output_buffer->depth++; + if (output_buffer->format) + { + *output_pointer++ = '\n'; + } + output_buffer->offset += length; + + while (current_item) + { + if (output_buffer->format) + { + size_t i; + output_pointer = ensure(output_buffer, output_buffer->depth); + if (output_pointer == NULL) + { + return false; + } + for (i = 0; i < output_buffer->depth; i++) + { + *output_pointer++ = '\t'; + } + output_buffer->offset += output_buffer->depth; + } + + /* print key */ + if (!print_string_ptr((unsigned char*)current_item->string, output_buffer)) + { + return false; + } + update_offset(output_buffer); + + length = (size_t) (output_buffer->format ? 2 : 1); + output_pointer = ensure(output_buffer, length); + if (output_pointer == NULL) + { + return false; + } + *output_pointer++ = ':'; + if (output_buffer->format) + { + *output_pointer++ = '\t'; + } + output_buffer->offset += length; + + /* print value */ + if (!print_value(current_item, output_buffer)) + { + return false; + } + update_offset(output_buffer); + + /* print comma if not last */ + length = ((size_t)(output_buffer->format ? 1 : 0) + (size_t)(current_item->next ? 1 : 0)); + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) + { + return false; + } + if (current_item->next) + { + *output_pointer++ = ','; + } + + if (output_buffer->format) + { + *output_pointer++ = '\n'; + } + *output_pointer = '\0'; + output_buffer->offset += length; + + current_item = current_item->next; + } + + output_pointer = ensure(output_buffer, output_buffer->format ? (output_buffer->depth + 1) : 2); + if (output_pointer == NULL) + { + return false; + } + if (output_buffer->format) + { + size_t i; + for (i = 0; i < (output_buffer->depth - 1); i++) + { + *output_pointer++ = '\t'; + } + } + *output_pointer++ = '}'; + *output_pointer = '\0'; + output_buffer->depth--; + + return true; +} + +/* Get Array size/item / object item. */ +CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array) +{ + cJSON *child = NULL; + size_t size = 0; + + if (array == NULL) + { + return 0; + } + + child = array->child; + + while(child != NULL) + { + size++; + child = child->next; + } + + /* FIXME: Can overflow here. Cannot be fixed without breaking the API */ + + return (int)size; +} + +static cJSON* get_array_item(const cJSON *array, size_t index) +{ + cJSON *current_child = NULL; + + if (array == NULL) + { + return NULL; + } + + current_child = array->child; + while ((current_child != NULL) && (index > 0)) + { + index--; + current_child = current_child->next; + } + + return current_child; +} + +CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index) +{ + if (index < 0) + { + return NULL; + } + + return get_array_item(array, (size_t)index); +} + +static cJSON *get_object_item(const cJSON * const object, const char * const name, const cJSON_bool case_sensitive) +{ + cJSON *current_element = NULL; + + if ((object == NULL) || (name == NULL)) + { + return NULL; + } + + current_element = object->child; + if (case_sensitive) + { + while ((current_element != NULL) && (current_element->string != NULL) && (strcmp(name, current_element->string) != 0)) + { + current_element = current_element->next; + } + } + else + { + while ((current_element != NULL) && (case_insensitive_strcmp((const unsigned char*)name, (const unsigned char*)(current_element->string)) != 0)) + { + current_element = current_element->next; + } + } + + if ((current_element == NULL) || (current_element->string == NULL)) { + return NULL; + } + + return current_element; +} + +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string) +{ + return get_object_item(object, string, false); +} + +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string) +{ + return get_object_item(object, string, true); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string) +{ + return cJSON_GetObjectItem(object, string) ? 1 : 0; +} + +/* Utility for array list handling. */ +static void suffix_object(cJSON *prev, cJSON *item) +{ + prev->next = item; + item->prev = prev; +} + +/* Utility for handling references. */ +static cJSON *create_reference(const cJSON *item, const internal_hooks * const hooks) +{ + cJSON *reference = NULL; + if (item == NULL) + { + return NULL; + } + + reference = cJSON_New_Item(hooks); + if (reference == NULL) + { + return NULL; + } + + memcpy(reference, item, sizeof(cJSON)); + reference->string = NULL; + reference->type |= cJSON_IsReference; + reference->next = reference->prev = NULL; + return reference; +} + +static cJSON_bool add_item_to_array(cJSON *array, cJSON *item) +{ + cJSON *child = NULL; + + if ((item == NULL) || (array == NULL) || (array == item)) + { + return false; + } + + child = array->child; + /* + * To find the last item in array quickly, we use prev in array + */ + if (child == NULL) + { + /* list is empty, start new one */ + array->child = item; + item->prev = item; + item->next = NULL; + } + else + { + /* append to the end */ + if (child->prev) + { + suffix_object(child->prev, item); + array->child->prev = item; + } + } + + return true; +} + +/* Add item to array/object. */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToArray(cJSON *array, cJSON *item) +{ + return add_item_to_array(array, item); +} + +#if defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5)))) + #pragma GCC diagnostic push +#endif +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif +/* helper function to cast away const */ +static void* cast_away_const(const void* string) +{ + return (void*)string; +} +#if defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5)))) + #pragma GCC diagnostic pop +#endif + + +static cJSON_bool add_item_to_object(cJSON * const object, const char * const string, cJSON * const item, const internal_hooks * const hooks, const cJSON_bool constant_key) +{ + char *new_key = NULL; + int new_type = cJSON_Invalid; + + if ((object == NULL) || (string == NULL) || (item == NULL) || (object == item)) + { + return false; + } + + if (constant_key) + { + new_key = (char*)cast_away_const(string); + new_type = item->type | cJSON_StringIsConst; + } + else + { + new_key = (char*)cJSON_strdup((const unsigned char*)string, hooks); + if (new_key == NULL) + { + return false; + } + + new_type = item->type & ~cJSON_StringIsConst; + } + + if (!(item->type & cJSON_StringIsConst) && (item->string != NULL)) + { + hooks->deallocate(item->string); + } + + item->string = new_key; + item->type = new_type; + + return add_item_to_array(object, item); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item) +{ + return add_item_to_object(object, string, item, &global_hooks, false); +} + +/* Add an item to an object with constant string as key */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item) +{ + return add_item_to_object(object, string, item, &global_hooks, true); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item) +{ + if (array == NULL) + { + return false; + } + + return add_item_to_array(array, create_reference(item, &global_hooks)); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item) +{ + if ((object == NULL) || (string == NULL)) + { + return false; + } + + return add_item_to_object(object, string, create_reference(item, &global_hooks), &global_hooks, false); +} + +CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name) +{ + cJSON *null = cJSON_CreateNull(); + if (add_item_to_object(object, name, null, &global_hooks, false)) + { + return null; + } + + cJSON_Delete(null); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name) +{ + cJSON *true_item = cJSON_CreateTrue(); + if (add_item_to_object(object, name, true_item, &global_hooks, false)) + { + return true_item; + } + + cJSON_Delete(true_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name) +{ + cJSON *false_item = cJSON_CreateFalse(); + if (add_item_to_object(object, name, false_item, &global_hooks, false)) + { + return false_item; + } + + cJSON_Delete(false_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean) +{ + cJSON *bool_item = cJSON_CreateBool(boolean); + if (add_item_to_object(object, name, bool_item, &global_hooks, false)) + { + return bool_item; + } + + cJSON_Delete(bool_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number) +{ + cJSON *number_item = cJSON_CreateNumber(number); + if (add_item_to_object(object, name, number_item, &global_hooks, false)) + { + return number_item; + } + + cJSON_Delete(number_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string) +{ + cJSON *string_item = cJSON_CreateString(string); + if (add_item_to_object(object, name, string_item, &global_hooks, false)) + { + return string_item; + } + + cJSON_Delete(string_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw) +{ + cJSON *raw_item = cJSON_CreateRaw(raw); + if (add_item_to_object(object, name, raw_item, &global_hooks, false)) + { + return raw_item; + } + + cJSON_Delete(raw_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name) +{ + cJSON *object_item = cJSON_CreateObject(); + if (add_item_to_object(object, name, object_item, &global_hooks, false)) + { + return object_item; + } + + cJSON_Delete(object_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name) +{ + cJSON *array = cJSON_CreateArray(); + if (add_item_to_object(object, name, array, &global_hooks, false)) + { + return array; + } + + cJSON_Delete(array); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item) +{ + if ((parent == NULL) || (item == NULL)) + { + return NULL; + } + + if (item != parent->child) + { + /* not the first element */ + item->prev->next = item->next; + } + if (item->next != NULL) + { + /* not the last element */ + item->next->prev = item->prev; + } + + if (item == parent->child) + { + /* first element */ + parent->child = item->next; + } + else if (item->next == NULL) + { + /* last element */ + parent->child->prev = item->prev; + } + + /* make sure the detached item doesn't point anywhere anymore */ + item->prev = NULL; + item->next = NULL; + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which) +{ + if (which < 0) + { + return NULL; + } + + return cJSON_DetachItemViaPointer(array, get_array_item(array, (size_t)which)); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which) +{ + cJSON_Delete(cJSON_DetachItemFromArray(array, which)); +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string) +{ + cJSON *to_detach = cJSON_GetObjectItem(object, string); + + return cJSON_DetachItemViaPointer(object, to_detach); +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string) +{ + cJSON *to_detach = cJSON_GetObjectItemCaseSensitive(object, string); + + return cJSON_DetachItemViaPointer(object, to_detach); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string) +{ + cJSON_Delete(cJSON_DetachItemFromObject(object, string)); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string) +{ + cJSON_Delete(cJSON_DetachItemFromObjectCaseSensitive(object, string)); +} + +/* Replace array/object items with new ones. */ +CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem) +{ + cJSON *after_inserted = NULL; + + if (which < 0 || newitem == NULL) + { + return false; + } + + after_inserted = get_array_item(array, (size_t)which); + if (after_inserted == NULL) + { + return add_item_to_array(array, newitem); + } + + if (after_inserted != array->child && after_inserted->prev == NULL) { + /* return false if after_inserted is a corrupted array item */ + return false; + } + + newitem->next = after_inserted; + newitem->prev = after_inserted->prev; + after_inserted->prev = newitem; + if (after_inserted == array->child) + { + array->child = newitem; + } + else + { + newitem->prev->next = newitem; + } + return true; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement) +{ + if ((parent == NULL) || (parent->child == NULL) || (replacement == NULL) || (item == NULL)) + { + return false; + } + + if (replacement == item) + { + return true; + } + + replacement->next = item->next; + replacement->prev = item->prev; + + if (replacement->next != NULL) + { + replacement->next->prev = replacement; + } + if (parent->child == item) + { + if (parent->child->prev == parent->child) + { + replacement->prev = replacement; + } + parent->child = replacement; + } + else + { /* + * To find the last item in array quickly, we use prev in array. + * We can't modify the last item's next pointer where this item was the parent's child + */ + if (replacement->prev != NULL) + { + replacement->prev->next = replacement; + } + if (replacement->next == NULL) + { + parent->child->prev = replacement; + } + } + + item->next = NULL; + item->prev = NULL; + cJSON_Delete(item); + + return true; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem) +{ + if (which < 0) + { + return false; + } + + return cJSON_ReplaceItemViaPointer(array, get_array_item(array, (size_t)which), newitem); +} + +static cJSON_bool replace_item_in_object(cJSON *object, const char *string, cJSON *replacement, cJSON_bool case_sensitive) +{ + if ((replacement == NULL) || (string == NULL)) + { + return false; + } + + /* replace the name in the replacement */ + if (!(replacement->type & cJSON_StringIsConst) && (replacement->string != NULL)) + { + cJSON_free(replacement->string); + } + replacement->string = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks); + if (replacement->string == NULL) + { + return false; + } + + replacement->type &= ~cJSON_StringIsConst; + + return cJSON_ReplaceItemViaPointer(object, get_object_item(object, string, case_sensitive), replacement); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem) +{ + return replace_item_in_object(object, string, newitem, false); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string, cJSON *newitem) +{ + return replace_item_in_object(object, string, newitem, true); +} + +/* Create basic types: */ +CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type = cJSON_NULL; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type = cJSON_True; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type = cJSON_False; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type = boolean ? cJSON_True : cJSON_False; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type = cJSON_Number; + item->valuedouble = num; + + /* use saturation in case of overflow */ + if (num >= INT_MAX) + { + item->valueint = INT_MAX; + } + else if (num <= (double)INT_MIN) + { + item->valueint = INT_MIN; + } + else + { + item->valueint = (int)num; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type = cJSON_String; + item->valuestring = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks); + if(!item->valuestring) + { + cJSON_Delete(item); + return NULL; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) + { + item->type = cJSON_String | cJSON_IsReference; + item->valuestring = (char*)cast_away_const(string); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) { + item->type = cJSON_Object | cJSON_IsReference; + item->child = (cJSON*)cast_away_const(child); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child) { + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) { + item->type = cJSON_Array | cJSON_IsReference; + item->child = (cJSON*)cast_away_const(child); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type = cJSON_Raw; + item->valuestring = (char*)cJSON_strdup((const unsigned char*)raw, &global_hooks); + if(!item->valuestring) + { + cJSON_Delete(item); + return NULL; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if(item) + { + item->type=cJSON_Array; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_Object; + } + + return item; +} + +/* Create Arrays: */ +CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + + for(i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateNumber(numbers[i]); + if (!n) + { + cJSON_Delete(a); + return NULL; + } + if(!i) + { + a->child = n; + } + else + { + suffix_object(p, n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + + for(i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateNumber((double)numbers[i]); + if(!n) + { + cJSON_Delete(a); + return NULL; + } + if(!i) + { + a->child = n; + } + else + { + suffix_object(p, n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + + for(i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateNumber(numbers[i]); + if(!n) + { + cJSON_Delete(a); + return NULL; + } + if(!i) + { + a->child = n; + } + else + { + suffix_object(p, n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char *const *strings, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (strings == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateString(strings[i]); + if(!n) + { + cJSON_Delete(a); + return NULL; + } + if(!i) + { + a->child = n; + } + else + { + suffix_object(p,n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +/* Duplication */ +CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse) +{ + cJSON *newitem = NULL; + cJSON *child = NULL; + cJSON *next = NULL; + cJSON *newchild = NULL; + + /* Bail on bad ptr */ + if (!item) + { + goto fail; + } + /* Create new item */ + newitem = cJSON_New_Item(&global_hooks); + if (!newitem) + { + goto fail; + } + /* Copy over all vars */ + newitem->type = item->type & (~cJSON_IsReference); + newitem->valueint = item->valueint; + newitem->valuedouble = item->valuedouble; + if (item->valuestring) + { + newitem->valuestring = (char*)cJSON_strdup((unsigned char*)item->valuestring, &global_hooks); + if (!newitem->valuestring) + { + goto fail; + } + } + if (item->string) + { + newitem->string = (item->type&cJSON_StringIsConst) ? item->string : (char*)cJSON_strdup((unsigned char*)item->string, &global_hooks); + if (!newitem->string) + { + goto fail; + } + } + /* If non-recursive, then we're done! */ + if (!recurse) + { + return newitem; + } + /* Walk the ->next chain for the child. */ + child = item->child; + while (child != NULL) + { + newchild = cJSON_Duplicate(child, true); /* Duplicate (with recurse) each item in the ->next chain */ + if (!newchild) + { + goto fail; + } + if (next != NULL) + { + /* If newitem->child already set, then crosswire ->prev and ->next and move on */ + next->next = newchild; + newchild->prev = next; + next = newchild; + } + else + { + /* Set newitem->child and move to it */ + newitem->child = newchild; + next = newchild; + } + child = child->next; + } + if (newitem && newitem->child) + { + newitem->child->prev = newchild; + } + + return newitem; + +fail: + if (newitem != NULL) + { + cJSON_Delete(newitem); + } + + return NULL; +} + +static void skip_oneline_comment(char **input) +{ + *input += static_strlen("//"); + + for (; (*input)[0] != '\0'; ++(*input)) + { + if ((*input)[0] == '\n') { + *input += static_strlen("\n"); + return; + } + } +} + +static void skip_multiline_comment(char **input) +{ + *input += static_strlen("/*"); + + for (; (*input)[0] != '\0'; ++(*input)) + { + if (((*input)[0] == '*') && ((*input)[1] == '/')) + { + *input += static_strlen("*/"); + return; + } + } +} + +static void minify_string(char **input, char **output) { + (*output)[0] = (*input)[0]; + *input += static_strlen("\""); + *output += static_strlen("\""); + + + for (; (*input)[0] != '\0'; (void)++(*input), ++(*output)) { + (*output)[0] = (*input)[0]; + + if ((*input)[0] == '\"') { + (*output)[0] = '\"'; + *input += static_strlen("\""); + *output += static_strlen("\""); + return; + } else if (((*input)[0] == '\\') && ((*input)[1] == '\"')) { + (*output)[1] = (*input)[1]; + *input += static_strlen("\""); + *output += static_strlen("\""); + } + } +} + +CJSON_PUBLIC(void) cJSON_Minify(char *json) +{ + char *into = json; + + if (json == NULL) + { + return; + } + + while (json[0] != '\0') + { + switch (json[0]) + { + case ' ': + case '\t': + case '\r': + case '\n': + json++; + break; + + case '/': + if (json[1] == '/') + { + skip_oneline_comment(&json); + } + else if (json[1] == '*') + { + skip_multiline_comment(&json); + } else { + json++; + } + break; + + case '\"': + minify_string(&json, (char**)&into); + break; + + default: + into[0] = json[0]; + json++; + into++; + } + } + + /* and null-terminate. */ + *into = '\0'; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Invalid; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_False; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xff) == cJSON_True; +} + + +CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & (cJSON_True | cJSON_False)) != 0; +} +CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_NULL; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Number; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_String; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Array; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Object; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Raw; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive) +{ + if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF))) + { + return false; + } + + /* check if type is valid */ + switch (a->type & 0xFF) + { + case cJSON_False: + case cJSON_True: + case cJSON_NULL: + case cJSON_Number: + case cJSON_String: + case cJSON_Raw: + case cJSON_Array: + case cJSON_Object: + break; + + default: + return false; + } + + /* identical objects are equal */ + if (a == b) + { + return true; + } + + switch (a->type & 0xFF) + { + /* in these cases and equal type is enough */ + case cJSON_False: + case cJSON_True: + case cJSON_NULL: + return true; + + case cJSON_Number: + if (compare_double(a->valuedouble, b->valuedouble)) + { + return true; + } + return false; + + case cJSON_String: + case cJSON_Raw: + if ((a->valuestring == NULL) || (b->valuestring == NULL)) + { + return false; + } + if (strcmp(a->valuestring, b->valuestring) == 0) + { + return true; + } + + return false; + + case cJSON_Array: + { + cJSON *a_element = a->child; + cJSON *b_element = b->child; + + for (; (a_element != NULL) && (b_element != NULL);) + { + if (!cJSON_Compare(a_element, b_element, case_sensitive)) + { + return false; + } + + a_element = a_element->next; + b_element = b_element->next; + } + + /* one of the arrays is longer than the other */ + if (a_element != b_element) { + return false; + } + + return true; + } + + case cJSON_Object: + { + cJSON *a_element = NULL; + cJSON *b_element = NULL; + cJSON_ArrayForEach(a_element, a) + { + /* TODO This has O(n^2) runtime, which is horrible! */ + b_element = get_object_item(b, a_element->string, case_sensitive); + if (b_element == NULL) + { + return false; + } + + if (!cJSON_Compare(a_element, b_element, case_sensitive)) + { + return false; + } + } + + /* doing this twice, once on a and b to prevent true comparison if a subset of b + * TODO: Do this the proper way, this is just a fix for now */ + cJSON_ArrayForEach(b_element, b) + { + a_element = get_object_item(a, b_element->string, case_sensitive); + if (a_element == NULL) + { + return false; + } + + if (!cJSON_Compare(b_element, a_element, case_sensitive)) + { + return false; + } + } + + return true; + } + + default: + return false; + } +} + +CJSON_PUBLIC(void *) cJSON_malloc(size_t size) +{ + return global_hooks.allocate(size); +} + +CJSON_PUBLIC(void) cJSON_free(void *object) +{ + global_hooks.deallocate(object); + object = NULL; +} diff --git a/externals/cjson/cJSON.h b/externals/cjson/cJSON.h new file mode 100644 index 0000000..88cf0bc --- /dev/null +++ b/externals/cjson/cJSON.h @@ -0,0 +1,300 @@ +/* + Copyright (c) 2009-2017 Dave Gamble and cJSON contributors + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#ifndef cJSON__h +#define cJSON__h + +#ifdef __cplusplus +extern "C" +{ +#endif + +#if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32)) +#define __WINDOWS__ +#endif + +#ifdef __WINDOWS__ + +/* When compiling for windows, we specify a specific calling convention to avoid issues where we are being called from a project with a different default calling convention. For windows you have 3 define options: + +CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever dllexport symbols +CJSON_EXPORT_SYMBOLS - Define this on library build when you want to dllexport symbols (default) +CJSON_IMPORT_SYMBOLS - Define this if you want to dllimport symbol + +For *nix builds that support visibility attribute, you can define similar behavior by + +setting default visibility to hidden by adding +-fvisibility=hidden (for gcc) +or +-xldscope=hidden (for sun cc) +to CFLAGS + +then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJSON_EXPORT_SYMBOLS does + +*/ + +#define CJSON_CDECL __cdecl +#define CJSON_STDCALL __stdcall + +/* export symbols by default, this is necessary for copy pasting the C and header file */ +#if !defined(CJSON_HIDE_SYMBOLS) && !defined(CJSON_IMPORT_SYMBOLS) && !defined(CJSON_EXPORT_SYMBOLS) +#define CJSON_EXPORT_SYMBOLS +#endif + +#if defined(CJSON_HIDE_SYMBOLS) +#define CJSON_PUBLIC(type) type CJSON_STDCALL +#elif defined(CJSON_EXPORT_SYMBOLS) +#define CJSON_PUBLIC(type) __declspec(dllexport) type CJSON_STDCALL +#elif defined(CJSON_IMPORT_SYMBOLS) +#define CJSON_PUBLIC(type) __declspec(dllimport) type CJSON_STDCALL +#endif +#else /* !__WINDOWS__ */ +#define CJSON_CDECL +#define CJSON_STDCALL + +#if (defined(__GNUC__) || defined(__SUNPRO_CC) || defined (__SUNPRO_C)) && defined(CJSON_API_VISIBILITY) +#define CJSON_PUBLIC(type) __attribute__((visibility("default"))) type +#else +#define CJSON_PUBLIC(type) type +#endif +#endif + +/* project version */ +#define CJSON_VERSION_MAJOR 1 +#define CJSON_VERSION_MINOR 7 +#define CJSON_VERSION_PATCH 18 + +#include + +/* cJSON Types: */ +#define cJSON_Invalid (0) +#define cJSON_False (1 << 0) +#define cJSON_True (1 << 1) +#define cJSON_NULL (1 << 2) +#define cJSON_Number (1 << 3) +#define cJSON_String (1 << 4) +#define cJSON_Array (1 << 5) +#define cJSON_Object (1 << 6) +#define cJSON_Raw (1 << 7) /* raw json */ + +#define cJSON_IsReference 256 +#define cJSON_StringIsConst 512 + +/* The cJSON structure: */ +typedef struct cJSON +{ + /* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */ + struct cJSON *next; + struct cJSON *prev; + /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */ + struct cJSON *child; + + /* The type of the item, as above. */ + int type; + + /* The item's string, if type==cJSON_String and type == cJSON_Raw */ + char *valuestring; + /* writing to valueint is DEPRECATED, use cJSON_SetNumberValue instead */ + int valueint; + /* The item's number, if type==cJSON_Number */ + double valuedouble; + + /* The item's name string, if this item is the child of, or is in the list of subitems of an object. */ + char *string; +} cJSON; + +typedef struct cJSON_Hooks +{ + /* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so ensure the hooks allow passing those functions directly. */ + void *(CJSON_CDECL *malloc_fn)(size_t sz); + void (CJSON_CDECL *free_fn)(void *ptr); +} cJSON_Hooks; + +typedef int cJSON_bool; + +/* Limits how deeply nested arrays/objects can be before cJSON rejects to parse them. + * This is to prevent stack overflows. */ +#ifndef CJSON_NESTING_LIMIT +#define CJSON_NESTING_LIMIT 1000 +#endif + +/* returns the version of cJSON as a string */ +CJSON_PUBLIC(const char*) cJSON_Version(void); + +/* Supply malloc, realloc and free functions to cJSON */ +CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks); + +/* Memory Management: the caller is always responsible to free the results from all variants of cJSON_Parse (with cJSON_Delete) and cJSON_Print (with stdlib free, cJSON_Hooks.free_fn, or cJSON_free as appropriate). The exception is cJSON_PrintPreallocated, where the caller has full responsibility of the buffer. */ +/* Supply a block of JSON, and this returns a cJSON object you can interrogate. */ +CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value); +CJSON_PUBLIC(cJSON *) cJSON_ParseWithLength(const char *value, size_t buffer_length); +/* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */ +/* If you supply a ptr in return_parse_end and parsing fails, then return_parse_end will contain a pointer to the error so will match cJSON_GetErrorPtr(). */ +CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated); +CJSON_PUBLIC(cJSON *) cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length, const char **return_parse_end, cJSON_bool require_null_terminated); + +/* Render a cJSON entity to text for transfer/storage. */ +CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item); +/* Render a cJSON entity to text for transfer/storage without any formatting. */ +CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item); +/* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */ +CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt); +/* Render a cJSON entity to text using a buffer already allocated in memory with given length. Returns 1 on success and 0 on failure. */ +/* NOTE: cJSON is not always 100% accurate in estimating how much memory it will use, so to be safe allocate 5 bytes more than you actually need */ +CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format); +/* Delete a cJSON entity and all subentities. */ +CJSON_PUBLIC(void) cJSON_Delete(cJSON *item); + +/* Returns the number of items in an array (or object). */ +CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array); +/* Retrieve item number "index" from array "array". Returns NULL if unsuccessful. */ +CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index); +/* Get item "string" from object. Case insensitive. */ +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string); +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string); +CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string); +/* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */ +CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void); + +/* Check item type and return its value */ +CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON * const item); +CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON * const item); + +/* These functions check the type of an item */ +CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item); + +/* These calls create a cJSON item of the appropriate type. */ +CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean); +CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num); +CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string); +/* raw json */ +CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw); +CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void); + +/* Create a string where valuestring references a string so + * it will not be freed by cJSON_Delete */ +CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string); +/* Create an object/array that only references it's elements so + * they will not be freed by cJSON_Delete */ +CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child); +CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child); + +/* These utilities create an Array of count items. + * The parameter count cannot be greater than the number of elements in the number array, otherwise array access will be out of bounds.*/ +CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count); +CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count); +CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count); +CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char *const *strings, int count); + +/* Append item to the specified array/object. */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToArray(cJSON *array, cJSON *item); +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item); +/* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the cJSON object. + * WARNING: When this function was used, make sure to always check that (item->type & cJSON_StringIsConst) is zero before + * writing to `item->string` */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item); +/* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item); +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item); + +/* Remove/Detach items from Arrays/Objects. */ +CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item); +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which); +CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which); +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string); +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string); +CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string); +CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string); + +/* Update array items. */ +CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem); /* Shifts pre-existing items to the right. */ +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement); +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem); +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem); +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object,const char *string,cJSON *newitem); + +/* Duplicate a cJSON item */ +CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse); +/* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will + * need to be released. With recurse!=0, it will duplicate any children connected to the item. + * The item->next and ->prev pointers are always zero on return from Duplicate. */ +/* Recursively compare two cJSON items for equality. If either a or b is NULL or invalid, they will be considered unequal. + * case_sensitive determines if object keys are treated case sensitive (1) or case insensitive (0) */ +CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive); + +/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n') from strings. + * The input pointer json cannot point to a read-only address area, such as a string constant, + * but should point to a readable and writable address area. */ +CJSON_PUBLIC(void) cJSON_Minify(char *json); + +/* Helper functions for creating and adding items to an object at the same time. + * They return the added item or NULL on failure. */ +CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name); +CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name); +CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name); +CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean); +CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number); +CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string); +CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw); +CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name); +CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name); + +/* When assigning an integer value, it needs to be propagated to valuedouble too. */ +#define cJSON_SetIntValue(object, number) ((object) ? (object)->valueint = (object)->valuedouble = (number) : (number)) +/* helper for the cJSON_SetNumberValue macro */ +CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number); +#define cJSON_SetNumberValue(object, number) ((object != NULL) ? cJSON_SetNumberHelper(object, (double)number) : (number)) +/* Change the valuestring of a cJSON_String object, only takes effect when type of object is cJSON_String */ +CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring); + +/* If the object is not a boolean type this does nothing and returns cJSON_Invalid else it returns the new type*/ +#define cJSON_SetBoolValue(object, boolValue) ( \ + (object != NULL && ((object)->type & (cJSON_False|cJSON_True))) ? \ + (object)->type=((object)->type &(~(cJSON_False|cJSON_True)))|((boolValue)?cJSON_True:cJSON_False) : \ + cJSON_Invalid\ +) + +/* Macro for iterating over an array or object */ +#define cJSON_ArrayForEach(element, array) for(element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next) + +/* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */ +CJSON_PUBLIC(void *) cJSON_malloc(size_t size); +CJSON_PUBLIC(void) cJSON_free(void *object); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/mk/config.mk b/mk/config.mk index 9b7067f..81b4e69 100644 --- a/mk/config.mk +++ b/mk/config.mk @@ -16,7 +16,8 @@ endif # Exclude native macOS test files from cross-compilation NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c tests/test-oci-ref.c \ - tests/test-oci-digest.c tests/test-oci-blob-store.c + tests/test-oci-digest.c tests/test-oci-blob-store.c \ + tests/test-oci-manifest.c SPECIAL_TEST_SRCS := tests/test-lowbase-mem.c SPECIAL_TEST_BINS := $(BUILD_DIR)/test-lowbase-mem-200000 $(BUILD_DIR)/test-lowbase-mem-300000 diff --git a/mk/tests.mk b/mk/tests.mk index fc2f935..6dcfe4f 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -6,7 +6,7 @@ test-glibc-coreutils test-perf \ test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \ test-full test-multi-vcpu test-rwx \ - test-oci-ref test-oci-digest test-oci-blob-store \ + test-oci-ref test-oci-digest test-oci-blob-store test-oci-manifest \ test-sysroot-rename \ test-case-collision test-case-collision-fallback test-sysroot-create-paths \ test-proctitle-low-stack \ @@ -39,6 +39,8 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage @$(MAKE) --no-print-directory test-oci-digest @printf "\n$(BLUE)━━━ OCI blob store unit tests ━━━$(RESET)\n" @$(MAKE) --no-print-directory test-oci-blob-store + @printf "\n$(BLUE)━━━ OCI manifest parser unit tests ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-manifest ## Run the OCI image reference parser unit tests (native, no HVF) test-oci-ref: $(BUILD_DIR)/test-oci-ref @@ -52,6 +54,10 @@ test-oci-digest: $(BUILD_DIR)/test-oci-digest test-oci-blob-store: $(BUILD_DIR)/test-oci-blob-store @$(BUILD_DIR)/test-oci-blob-store +## Run the OCI manifest / index / config parser unit tests (native, no HVF) +test-oci-manifest: $(BUILD_DIR)/test-oci-manifest + @$(BUILD_DIR)/test-oci-manifest + test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename @tmpdir=$$(mktemp -d); \ trap 'rm -rf "$$tmpdir"; rm -f /tmp/elfuse-sysroot-rename-dst.txt' EXIT; \ diff --git a/src/oci/manifest.c b/src/oci/manifest.c new file mode 100644 index 0000000..6022112 --- /dev/null +++ b/src/oci/manifest.c @@ -0,0 +1,707 @@ +/* OCI image manifest, image index, and image config parsers + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "manifest.h" + +#include +#include +#include +#include + +#include "../../externals/cjson/cJSON.h" + +/* Maximum representable size that can survive a double round-trip without + * silent precision loss. JSON numbers parse through double in cJSON, so any + * size beyond 2^53 - 1 would already be off by ones; sizes well below that + * cover every realistic OCI layer. + */ +#define SIZE_MAX_SAFE INT64_C(0x1fffffffffffff) + +/* Optional string helper. JSON value may be absent (NULL item) or string + * type. Returns 1 on accepted (out_dup may be empty on success when allow + * is true), 0 on absent (out_dup left as default), -1 on type error. The + * caller owns the returned string. + */ +static int dup_optional_string(const cJSON *parent, + const char *key, + char **out_dup, + const char **err_msg, + const char *type_err) +{ + const cJSON *item = cJSON_GetObjectItemCaseSensitive(parent, key); + if (!item) + return 0; + if (!cJSON_IsString(item) || !item->valuestring) { + if (err_msg) + *err_msg = type_err; + return -1; + } + char *dup = strdup(item->valuestring); + if (!dup) { + if (err_msg) + *err_msg = "out of memory copying string field"; + return -1; + } + free(*out_dup); + *out_dup = dup; + return 1; +} + +static int require_string(const cJSON *parent, + const char *key, + char **out_dup, + const char **err_msg, + const char *missing_msg, + const char *type_msg) +{ + const cJSON *item = cJSON_GetObjectItemCaseSensitive(parent, key); + if (!item) { + if (err_msg) + *err_msg = missing_msg; + return -1; + } + if (!cJSON_IsString(item) || !item->valuestring) { + if (err_msg) + *err_msg = type_msg; + return -1; + } + char *dup = strdup(item->valuestring); + if (!dup) { + if (err_msg) + *err_msg = "out of memory copying required string"; + return -1; + } + free(*out_dup); + *out_dup = dup; + return 0; +} + +/* Convert a JSON string array into a NULL-terminated char** array. Returns + * 0 on success, -1 on type error or allocation failure. On absent field + * the function returns 1 and leaves *out_array untouched. + */ +static int dup_string_array(const cJSON *parent, + const char *key, + char ***out_array, + const char **err_msg, + const char *type_msg, + bool required) +{ + const cJSON *item = cJSON_GetObjectItemCaseSensitive(parent, key); + if (!item) { + if (required) { + if (err_msg) + *err_msg = type_msg; + return -1; + } + return 1; + } + if (!cJSON_IsArray(item)) { + if (err_msg) + *err_msg = type_msg; + return -1; + } + int n = cJSON_GetArraySize(item); + if (n < 0) + n = 0; + char **arr = calloc((size_t) n + 1, sizeof(*arr)); + if (!arr) { + if (err_msg) + *err_msg = "out of memory allocating string array"; + return -1; + } + for (int i = 0; i < n; i++) { + const cJSON *elem = cJSON_GetArrayItem(item, i); + if (!cJSON_IsString(elem) || !elem->valuestring) { + if (err_msg) + *err_msg = type_msg; + goto fail; + } + arr[i] = strdup(elem->valuestring); + if (!arr[i]) { + if (err_msg) + *err_msg = "out of memory copying string-array element"; + goto fail; + } + } + arr[n] = NULL; + /* Free any prior value before publishing the new one. */ + if (*out_array) { + for (char **p = *out_array; *p; p++) + free(*p); + free(*out_array); + } + *out_array = arr; + return 0; +fail: + for (int i = 0; i < n; i++) + free(arr[i]); + free(arr); + return -1; +} + +/* Parse a non-negative integer-valued JSON number. cJSON keeps numbers in + * a double so the practical upper bound is 2^53 - 1; OCI layer sizes are + * well below that. + */ +static int parse_size_field(const cJSON *parent, + const char *key, + int64_t *out, + const char **err_msg) +{ + const cJSON *item = cJSON_GetObjectItemCaseSensitive(parent, key); + if (!item) { + if (err_msg) + *err_msg = "descriptor missing size field"; + return -1; + } + if (!cJSON_IsNumber(item)) { + if (err_msg) + *err_msg = "descriptor size field is not a number"; + return -1; + } + double v = item->valuedouble; + if (!(v >= 0.0) || v > (double) SIZE_MAX_SAFE) { + if (err_msg) + *err_msg = "descriptor size out of representable range"; + return -1; + } + /* Round-trip check: the JSON number must already be an integer. The + * double-to-int64 cast truncates; reject anything with a fractional part + * before truncation hides the divergence. + */ + int64_t as_int = (int64_t) v; + if ((double) as_int != v) { + if (err_msg) + *err_msg = "descriptor size field is not an integer"; + return -1; + } + *out = as_int; + return 0; +} + +static int parse_descriptor(const cJSON *obj, + oci_descriptor_t *out, + const char **err_msg) +{ + memset(out, 0, sizeof(*out)); + + /* mediaType: optional per OCI image-spec (some legacy responses omit it + * on the implicit root), but every descriptor that lives inside another + * document does carry it. Treat it as required at parse time and let + * the caller relax it for the top-level document if needed. + */ + char *raw_mt = NULL; + if (require_string(obj, "mediaType", &raw_mt, err_msg, + "descriptor missing mediaType", + "descriptor mediaType must be a string") < 0) + goto fail; + out->raw_media_type = raw_mt; + out->media_type = oci_media_type_parse(raw_mt); + + if (require_string(obj, "digest", &out->digest_str, err_msg, + "descriptor missing digest", + "descriptor digest must be a string") < 0) + goto fail; + if (!oci_digest_parse(out->digest_str, &out->algo, out->hex)) { + if (err_msg) + *err_msg = "descriptor digest is malformed or not lowercase"; + goto fail; + } + + if (parse_size_field(obj, "size", &out->size, err_msg) < 0) + goto fail; + return 0; +fail: + oci_descriptor_free(out); + return -1; +} + +static int parse_platform(const cJSON *obj, + oci_platform_t *out, + const char **err_msg) +{ + memset(out, 0, sizeof(*out)); + if (!obj || !cJSON_IsObject(obj)) { + if (err_msg) + *err_msg = "platform field missing or not an object"; + return -1; + } + if (require_string(obj, "architecture", &out->architecture, err_msg, + "platform missing architecture", + "platform architecture must be a string") < 0) + goto fail; + if (require_string(obj, "os", &out->os, err_msg, + "platform missing os", + "platform os must be a string") < 0) + goto fail; + + /* variant and os.version default to "" so callers can compare without + * NULL checks. dup_optional_string sets the field only when present. + */ + if (dup_optional_string(obj, "variant", &out->variant, err_msg, + "platform variant must be a string") < 0) + goto fail; + if (!out->variant) { + out->variant = strdup(""); + if (!out->variant) { + if (err_msg) + *err_msg = "out of memory defaulting variant"; + goto fail; + } + } + if (dup_optional_string(obj, "os.version", &out->os_version, err_msg, + "platform os.version must be a string") < 0) + goto fail; + if (!out->os_version) { + out->os_version = strdup(""); + if (!out->os_version) { + if (err_msg) + *err_msg = "out of memory defaulting os.version"; + goto fail; + } + } + return 0; +fail: + oci_platform_free(out); + return -1; +} + +static int parse_int_field(const cJSON *parent, + const char *key, + int *out, + bool required, + const char **err_msg, + const char *missing_msg, + const char *type_msg) +{ + const cJSON *item = cJSON_GetObjectItemCaseSensitive(parent, key); + if (!item) { + if (required) { + if (err_msg) + *err_msg = missing_msg; + return -1; + } + return 1; + } + if (!cJSON_IsNumber(item)) { + if (err_msg) + *err_msg = type_msg; + return -1; + } + *out = item->valueint; + return 0; +} + +/* Convert a cJSON parse failure into our diagnostic message space. cJSON's + * cJSON_GetErrorPtr is process-global; the message we set is static and the + * caller never frees it. + */ +static void set_parse_err(const char **err_msg, const char *fallback) +{ + if (err_msg) + *err_msg = fallback; + errno = EINVAL; +} + +int oci_manifest_parse(const char *json, + size_t len, + oci_manifest_t *out, + const char **err_msg) +{ + if (!json || !out) { + set_parse_err(err_msg, "oci_manifest_parse: NULL input"); + return -1; + } + memset(out, 0, sizeof(*out)); + + cJSON *root = cJSON_ParseWithLength(json, len); + if (!root) { + set_parse_err(err_msg, "manifest JSON is malformed"); + return -1; + } + if (!cJSON_IsObject(root)) { + set_parse_err(err_msg, "manifest JSON root is not an object"); + goto fail; + } + + if (parse_int_field(root, "schemaVersion", &out->schema_version, true, + err_msg, "manifest missing schemaVersion", + "manifest schemaVersion must be a number") < 0) + goto fail; + if (out->schema_version != 2) { + set_parse_err(err_msg, "manifest schemaVersion must be 2"); + goto fail; + } + + /* mediaType on the manifest itself is optional in some Docker responses + * (the Content-Type header is canonical there); record raw and parsed + * forms but do not reject on absence. + */ + if (dup_optional_string(root, "mediaType", &out->raw_media_type, err_msg, + "manifest mediaType must be a string") < 0) + goto fail; + out->media_type = out->raw_media_type + ? oci_media_type_parse(out->raw_media_type) + : OCI_MT_UNKNOWN; + + const cJSON *cfg = cJSON_GetObjectItemCaseSensitive(root, "config"); + if (!cfg || !cJSON_IsObject(cfg)) { + set_parse_err(err_msg, "manifest config descriptor missing"); + goto fail; + } + if (parse_descriptor(cfg, &out->config, err_msg) < 0) + goto fail; + if (!oci_media_type_is_config(out->config.media_type)) { + set_parse_err(err_msg, "manifest config has non-config media type"); + goto fail; + } + + const cJSON *layers = cJSON_GetObjectItemCaseSensitive(root, "layers"); + if (!layers || !cJSON_IsArray(layers)) { + set_parse_err(err_msg, "manifest layers array missing"); + goto fail; + } + int nlayers = cJSON_GetArraySize(layers); + if (nlayers < 0) + nlayers = 0; + if (nlayers > 0) { + out->layers = calloc((size_t) nlayers, sizeof(*out->layers)); + if (!out->layers) { + set_parse_err(err_msg, "out of memory allocating layer array"); + errno = ENOMEM; + goto fail; + } + } + for (int i = 0; i < nlayers; i++) { + const cJSON *desc = cJSON_GetArrayItem(layers, i); + if (!cJSON_IsObject(desc)) { + set_parse_err(err_msg, "manifest layer entry is not an object"); + goto fail; + } + if (parse_descriptor(desc, &out->layers[out->nlayers], err_msg) < 0) + goto fail; + oci_media_type_t lmt = out->layers[out->nlayers].media_type; + if (!oci_media_type_is_layer(lmt)) { + set_parse_err(err_msg, + "manifest layer has non-layer media type"); + goto fail; + } + if (oci_media_type_is_foreign(lmt)) { + set_parse_err(err_msg, + "manifest references foreign (nondistributable) " + "layer; not supported"); + goto fail; + } + if (!oci_media_type_is_layer_supported(lmt)) { + set_parse_err(err_msg, + "manifest layer media type is not supported " + "(only tar / tar+gzip / tar+zstd)"); + goto fail; + } + out->nlayers++; + } + + cJSON_Delete(root); + return 0; +fail: + cJSON_Delete(root); + oci_manifest_free(out); + return -1; +} + +int oci_index_parse(const char *json, + size_t len, + oci_index_t *out, + const char **err_msg) +{ + if (!json || !out) { + set_parse_err(err_msg, "oci_index_parse: NULL input"); + return -1; + } + memset(out, 0, sizeof(*out)); + + cJSON *root = cJSON_ParseWithLength(json, len); + if (!root) { + set_parse_err(err_msg, "index JSON is malformed"); + return -1; + } + if (!cJSON_IsObject(root)) { + set_parse_err(err_msg, "index JSON root is not an object"); + goto fail; + } + + if (parse_int_field(root, "schemaVersion", &out->schema_version, true, + err_msg, "index missing schemaVersion", + "index schemaVersion must be a number") < 0) + goto fail; + if (out->schema_version != 2) { + set_parse_err(err_msg, "index schemaVersion must be 2"); + goto fail; + } + + if (dup_optional_string(root, "mediaType", &out->raw_media_type, err_msg, + "index mediaType must be a string") < 0) + goto fail; + out->media_type = out->raw_media_type + ? oci_media_type_parse(out->raw_media_type) + : OCI_MT_UNKNOWN; + + const cJSON *manifests = + cJSON_GetObjectItemCaseSensitive(root, "manifests"); + if (!manifests || !cJSON_IsArray(manifests)) { + set_parse_err(err_msg, "index manifests array missing"); + goto fail; + } + int n = cJSON_GetArraySize(manifests); + if (n < 0) + n = 0; + if (n > 0) { + out->entries = calloc((size_t) n, sizeof(*out->entries)); + if (!out->entries) { + set_parse_err(err_msg, "out of memory allocating index entries"); + errno = ENOMEM; + goto fail; + } + } + for (int i = 0; i < n; i++) { + const cJSON *entry = cJSON_GetArrayItem(manifests, i); + if (!cJSON_IsObject(entry)) { + set_parse_err(err_msg, "index manifest entry is not an object"); + goto fail; + } + oci_index_entry_t *slot = &out->entries[out->nentries]; + if (parse_descriptor(entry, &slot->desc, err_msg) < 0) + goto fail; + const cJSON *plat = + cJSON_GetObjectItemCaseSensitive(entry, "platform"); + if (parse_platform(plat, &slot->platform, err_msg) < 0) + goto fail; + out->nentries++; + } + + cJSON_Delete(root); + return 0; +fail: + cJSON_Delete(root); + oci_index_free(out); + return -1; +} + +int oci_image_config_parse(const char *json, + size_t len, + oci_image_config_t *out, + const char **err_msg) +{ + if (!json || !out) { + set_parse_err(err_msg, "oci_image_config_parse: NULL input"); + return -1; + } + memset(out, 0, sizeof(*out)); + + cJSON *root = cJSON_ParseWithLength(json, len); + if (!root) { + set_parse_err(err_msg, "image config JSON is malformed"); + return -1; + } + if (!cJSON_IsObject(root)) { + set_parse_err(err_msg, "image config JSON root is not an object"); + goto fail; + } + + if (require_string(root, "architecture", &out->architecture, err_msg, + "image config missing architecture", + "image config architecture must be a string") < 0) + goto fail; + if (require_string(root, "os", &out->os, err_msg, + "image config missing os", + "image config os must be a string") < 0) + goto fail; + if (dup_optional_string(root, "variant", &out->variant, err_msg, + "image config variant must be a string") < 0) + goto fail; + + const cJSON *cfg = cJSON_GetObjectItemCaseSensitive(root, "config"); + if (cfg) { + if (!cJSON_IsObject(cfg)) { + set_parse_err(err_msg, "image config.config must be an object"); + goto fail; + } + if (dup_optional_string(cfg, "User", &out->config.user, err_msg, + "image config User must be a string") < 0) + goto fail; + if (dup_optional_string(cfg, "WorkingDir", &out->config.working_dir, + err_msg, + "image config WorkingDir must be a string") < + 0) + goto fail; + if (dup_string_array(cfg, "Env", &out->config.env, err_msg, + "image config Env must be a string array", + false) < 0) + goto fail; + if (dup_string_array(cfg, "Entrypoint", &out->config.entrypoint, + err_msg, + "image config Entrypoint must be a string array", + false) < 0) + goto fail; + if (dup_string_array(cfg, "Cmd", &out->config.cmd, err_msg, + "image config Cmd must be a string array", + false) < 0) + goto fail; + } + + const cJSON *rootfs = cJSON_GetObjectItemCaseSensitive(root, "rootfs"); + if (!rootfs || !cJSON_IsObject(rootfs)) { + set_parse_err(err_msg, "image config rootfs object missing"); + goto fail; + } + const cJSON *type = cJSON_GetObjectItemCaseSensitive(rootfs, "type"); + if (!type || !cJSON_IsString(type) || !type->valuestring || + strcmp(type->valuestring, "layers") != 0) { + set_parse_err(err_msg, "image config rootfs.type must be \"layers\""); + goto fail; + } + if (dup_string_array(rootfs, "diff_ids", &out->rootfs_diff_ids, err_msg, + "image config rootfs.diff_ids must be a string " + "array", + true) < 0) + goto fail; + /* Validate every diff_id is a recognized digest. */ + for (char **p = out->rootfs_diff_ids; p && *p; p++) { + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse(*p, &algo, hex)) { + set_parse_err(err_msg, + "image config rootfs.diff_ids entry is malformed " + "or not lowercase"); + goto fail; + } + } + + cJSON_Delete(root); + return 0; +fail: + cJSON_Delete(root); + oci_image_config_free(out); + return -1; +} + +void oci_descriptor_free(oci_descriptor_t *d) +{ + if (!d) + return; + free(d->digest_str); + free(d->raw_media_type); + memset(d, 0, sizeof(*d)); +} + +void oci_platform_free(oci_platform_t *p) +{ + if (!p) + return; + free(p->architecture); + free(p->os); + free(p->variant); + free(p->os_version); + memset(p, 0, sizeof(*p)); +} + +static void runtime_free(oci_image_runtime_t *r) +{ + if (!r) + return; + free(r->user); + free(r->working_dir); + if (r->env) { + for (char **p = r->env; *p; p++) + free(*p); + free(r->env); + } + if (r->entrypoint) { + for (char **p = r->entrypoint; *p; p++) + free(*p); + free(r->entrypoint); + } + if (r->cmd) { + for (char **p = r->cmd; *p; p++) + free(*p); + free(r->cmd); + } + memset(r, 0, sizeof(*r)); +} + +void oci_manifest_free(oci_manifest_t *m) +{ + if (!m) + return; + free(m->raw_media_type); + oci_descriptor_free(&m->config); + for (size_t i = 0; i < m->nlayers; i++) + oci_descriptor_free(&m->layers[i]); + free(m->layers); + memset(m, 0, sizeof(*m)); +} + +void oci_index_free(oci_index_t *idx) +{ + if (!idx) + return; + free(idx->raw_media_type); + for (size_t i = 0; i < idx->nentries; i++) { + oci_descriptor_free(&idx->entries[i].desc); + oci_platform_free(&idx->entries[i].platform); + } + free(idx->entries); + memset(idx, 0, sizeof(*idx)); +} + +void oci_image_config_free(oci_image_config_t *c) +{ + if (!c) + return; + free(c->architecture); + free(c->os); + free(c->variant); + runtime_free(&c->config); + if (c->rootfs_diff_ids) { + for (char **p = c->rootfs_diff_ids; *p; p++) + free(*p); + free(c->rootfs_diff_ids); + } + memset(c, 0, sizeof(*c)); +} + +const oci_index_entry_t *oci_index_pick_linux_arm64(const oci_index_t *idx) +{ + if (!idx || !idx->entries) + return NULL; + + const oci_index_entry_t *fallback_empty = NULL; + const oci_index_entry_t *fallback_any = NULL; + + for (size_t i = 0; i < idx->nentries; i++) { + const oci_index_entry_t *e = &idx->entries[i]; + if (strcmp(e->platform.os, "linux") != 0) + continue; + if (strcmp(e->platform.architecture, "arm64") != 0) + continue; + /* Skip foreign or unrecognized manifest media types: the registry + * fetch path cannot consume them anyway, so they are not viable + * even when the platform matches. + */ + if (!oci_media_type_is_manifest(e->desc.media_type)) + continue; + if (strcmp(e->platform.variant, "v8") == 0) + return e; + if (e->platform.variant[0] == '\0') { + if (!fallback_empty) + fallback_empty = e; + } else if (!fallback_any) { + fallback_any = e; + } + } + return fallback_empty ? fallback_empty : fallback_any; +} diff --git a/src/oci/manifest.h b/src/oci/manifest.h new file mode 100644 index 0000000..66ff14d --- /dev/null +++ b/src/oci/manifest.h @@ -0,0 +1,160 @@ +/* OCI image manifest, image index, and image config parsers + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Parses the three JSON document types served by an OCI / Docker registry: + * + * - image manifest: config descriptor + ordered layer descriptors + * - image index: platform-tagged manifest descriptors (multi-arch) + * - image config: architecture/os + runtime fields + rootfs diff_ids + * + * Phase 1 keeps the model offline: parsers operate on in-memory JSON bytes + * the caller already obtained from a registry fetch or disk fixture. The + * registry client lives in a later slice; the manifest model exists now so + * the fetch path can deserialize responses, and so the blob store can + * persist the parsed graph without round-tripping through opaque JSON. + * + * Every descriptor digest is validated up-front with oci/digest.c, so a + * parsed oci_descriptor_t is guaranteed to have a lowercase + * : form and a populated (algo, hex[]) pair the blob store can + * consume directly. + * + * Unknown / extension media types do not fail the parse; they are recorded + * with raw_media_type set and media_type == OCI_MT_UNKNOWN so callers can + * decide whether to ignore or reject. The selection helper for + * linux/arm64 manifests intentionally skips any entry that already failed + * media-type recognition because the registry fetch path cannot resolve + * it anyway. + */ + +#pragma once + +#include +#include + +#include "digest.h" +#include "media-type.h" + +typedef struct { + /* Original ":" string, lowercase, never NULL after parse. */ + char *digest_str; + /* Parsed digest algorithm. */ + oci_digest_algo_t algo; + /* Parsed lowercase hex (NUL-terminated). */ + char hex[OCI_DIGEST_HEX_MAX + 1]; + /* Declared size in bytes. Negative values are rejected at parse. */ + int64_t size; + /* Canonical media-type enum, OCI_MT_UNKNOWN if not in the recognized + * table. + */ + oci_media_type_t media_type; + /* Original media-type string for diagnostics. NULL if absent. */ + char *raw_media_type; +} oci_descriptor_t; + +typedef struct { + /* "arm64", "amd64", "ppc64le", ... Never NULL after parse. */ + char *architecture; + /* "linux", "windows", ... Never NULL after parse. */ + char *os; + /* "v8", "v7", "" (empty string when absent in JSON). */ + char *variant; + /* "10.0.14393.1066" for Windows builds, "" otherwise. */ + char *os_version; +} oci_platform_t; + +typedef struct { + oci_descriptor_t desc; + /* Empty platform fields ("" strings, not NULL) when JSON omits them so + * predicates can compare unconditionally. + */ + oci_platform_t platform; +} oci_index_entry_t; + +typedef struct { + int schema_version; + /* Top-level mediaType field. OCI manifests carry an explicit mediaType; + * Docker manifests historically rely on the descriptor or HTTP + * Content-Type. The parser falls back to OCI_MT_UNKNOWN if the JSON + * field is missing and lets the caller cross-check against the + * registry's Content-Type. + */ + oci_media_type_t media_type; + /* Original mediaType string, NULL if absent. */ + char *raw_media_type; + oci_index_entry_t *entries; + size_t nentries; +} oci_index_t; + +typedef struct { + int schema_version; + oci_media_type_t media_type; + char *raw_media_type; + oci_descriptor_t config; + oci_descriptor_t *layers; + size_t nlayers; +} oci_manifest_t; + +/* Image config runtime block (the inner "config" object). Phase 3 of the + * OCI roadmap consumes these fields; the model exists in Phase 1 to support + * elfuse oci inspect rendering. NULL-terminated string arrays are NULL when + * the JSON omits the field; empty arrays are represented as an allocated + * one-element array containing only the NULL terminator. + */ +typedef struct { + char *user; + char *working_dir; + char **env; + char **entrypoint; + char **cmd; +} oci_image_runtime_t; + +typedef struct { + char *architecture; + char *os; + char *variant; + oci_image_runtime_t config; + /* rootfs.diff_ids, NULL-terminated. Always populated (the OCI image-spec + * requires "rootfs"); a parse without this field returns -1. + */ + char **rootfs_diff_ids; +} oci_image_config_t; + +/* Parsers. Each takes raw JSON bytes (need not be NUL-terminated; pass the + * exact length). On success returns 0 and populates out. On failure returns + * -1 with errno preserved when set (ENOMEM, EINVAL) and writes a static + * diagnostic message into *err_msg (when err_msg != NULL). + */ +int oci_manifest_parse(const char *json, + size_t len, + oci_manifest_t *out, + const char **err_msg); + +int oci_index_parse(const char *json, + size_t len, + oci_index_t *out, + const char **err_msg); + +int oci_image_config_parse(const char *json, + size_t len, + oci_image_config_t *out, + const char **err_msg); + +/* Release any heap fields. Safe on zero-initialised structs and on NULL. */ +void oci_manifest_free(oci_manifest_t *m); +void oci_index_free(oci_index_t *idx); +void oci_image_config_free(oci_image_config_t *c); +void oci_descriptor_free(oci_descriptor_t *d); +void oci_platform_free(oci_platform_t *p); + +/* Select the linux/arm64 manifest from an index. Returns a pointer into + * idx->entries on success (caller does not free) or NULL when no acceptable + * platform is present. Preference order, highest first: + * 1. os=="linux" && arch=="arm64" && variant=="v8" + * 2. os=="linux" && arch=="arm64" && variant=="" + * 3. os=="linux" && arch=="arm64" (any other variant; first wins) + * Foreign / unsupported media types are skipped: even if a foreign-layer + * manifest claims linux/arm64, the registry fetch path cannot consume it. + */ +const oci_index_entry_t *oci_index_pick_linux_arm64(const oci_index_t *idx); diff --git a/src/oci/media-type.c b/src/oci/media-type.c new file mode 100644 index 0000000..920d40a --- /dev/null +++ b/src/oci/media-type.c @@ -0,0 +1,189 @@ +/* OCI / Docker media-type canonicalization + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "media-type.h" + +#include +#include +#include + +struct mt_entry { + const char *name; + oci_media_type_t kind; +}; + +/* All recognized OCI and Docker media types in a single table. Order has no + * semantic meaning; the lookup is linear because the table is small (~16 + * entries) and runs at most once per descriptor parse. + */ +static const struct mt_entry MEDIA_TYPES[] = { + /* Manifest documents. */ + {"application/vnd.oci.image.manifest.v1+json", OCI_MT_MANIFEST_OCI}, + {"application/vnd.docker.distribution.manifest.v2+json", + OCI_MT_MANIFEST_DOCKER}, + + /* Image indexes / manifest lists. */ + {"application/vnd.oci.image.index.v1+json", OCI_MT_INDEX_OCI}, + {"application/vnd.docker.distribution.manifest.list.v2+json", + OCI_MT_INDEX_DOCKER}, + + /* Image config. */ + {"application/vnd.oci.image.config.v1+json", OCI_MT_CONFIG_OCI}, + {"application/vnd.docker.container.image.v1+json", OCI_MT_CONFIG_DOCKER}, + + /* Supported layer payloads. */ + {"application/vnd.oci.image.layer.v1.tar", OCI_MT_LAYER_OCI_TAR}, + {"application/vnd.oci.image.layer.v1.tar+gzip", OCI_MT_LAYER_OCI_TAR_GZIP}, + {"application/vnd.oci.image.layer.v1.tar+zstd", OCI_MT_LAYER_OCI_TAR_ZSTD}, + {"application/vnd.docker.image.rootfs.diff.tar.gzip", + OCI_MT_LAYER_DOCKER_TAR_GZIP}, + {"application/vnd.docker.image.rootfs.diff.tar.zstd", + OCI_MT_LAYER_DOCKER_TAR_ZSTD}, + + /* Foreign (nondistributable) layers. Recognized so the parser can produce + * a precise rejection message instead of falling through to UNKNOWN. + */ + {"application/vnd.oci.image.layer.nondistributable.v1.tar", + OCI_MT_LAYER_FOREIGN_OCI}, + {"application/vnd.oci.image.layer.nondistributable.v1.tar+gzip", + OCI_MT_LAYER_FOREIGN_OCI_GZIP}, + {"application/vnd.docker.image.rootfs.foreign.diff.tar", + OCI_MT_LAYER_FOREIGN_DOCKER}, + {"application/vnd.docker.image.rootfs.foreign.diff.tar.gzip", + OCI_MT_LAYER_FOREIGN_DOCKER_GZIP}, +}; + +#define MEDIA_TYPE_COUNT (sizeof(MEDIA_TYPES) / sizeof(MEDIA_TYPES[0])) + +/* Strip surrounding whitespace and any parameters after ';'. Writes the + * canonical span into out. Returns the canonical length or 0 if the input + * collapses to empty. + */ +static size_t canonicalize(const char *s, char *out, size_t out_size) +{ + if (!s || out_size == 0) + return 0; + + while (*s == ' ' || *s == '\t') + s++; + + const char *end = s; + while (*end && *end != ';') + end++; + while (end > s && (end[-1] == ' ' || end[-1] == '\t')) + end--; + + size_t len = (size_t) (end - s); + if (len == 0 || len >= out_size) + return 0; + memcpy(out, s, len); + out[len] = '\0'; + return len; +} + +oci_media_type_t oci_media_type_parse(const char *s) +{ + if (!s) + return OCI_MT_UNKNOWN; + + /* Media-type values in OCI manifests are short; 192 bytes covers every + * canonical name in the table with room for adversarial whitespace. + */ + char buf[192]; + if (canonicalize(s, buf, sizeof(buf)) == 0) + return OCI_MT_UNKNOWN; + + for (size_t i = 0; i < MEDIA_TYPE_COUNT; i++) { + if (!strcmp(MEDIA_TYPES[i].name, buf)) + return MEDIA_TYPES[i].kind; + } + return OCI_MT_UNKNOWN; +} + +const char *oci_media_type_name(oci_media_type_t mt) +{ + for (size_t i = 0; i < MEDIA_TYPE_COUNT; i++) { + if (MEDIA_TYPES[i].kind == mt) + return MEDIA_TYPES[i].name; + } + return NULL; +} + +bool oci_media_type_is_manifest(oci_media_type_t mt) +{ + return mt == OCI_MT_MANIFEST_OCI || mt == OCI_MT_MANIFEST_DOCKER; +} + +bool oci_media_type_is_index(oci_media_type_t mt) +{ + return mt == OCI_MT_INDEX_OCI || mt == OCI_MT_INDEX_DOCKER; +} + +bool oci_media_type_is_config(oci_media_type_t mt) +{ + return mt == OCI_MT_CONFIG_OCI || mt == OCI_MT_CONFIG_DOCKER; +} + +bool oci_media_type_is_layer(oci_media_type_t mt) +{ + switch (mt) { + case OCI_MT_LAYER_OCI_TAR: + case OCI_MT_LAYER_OCI_TAR_GZIP: + case OCI_MT_LAYER_OCI_TAR_ZSTD: + case OCI_MT_LAYER_DOCKER_TAR_GZIP: + case OCI_MT_LAYER_DOCKER_TAR_ZSTD: + case OCI_MT_LAYER_FOREIGN_OCI: + case OCI_MT_LAYER_FOREIGN_OCI_GZIP: + case OCI_MT_LAYER_FOREIGN_DOCKER: + case OCI_MT_LAYER_FOREIGN_DOCKER_GZIP: + return true; + default: + return false; + } +} + +bool oci_media_type_is_layer_supported(oci_media_type_t mt) +{ + switch (mt) { + case OCI_MT_LAYER_OCI_TAR: + case OCI_MT_LAYER_OCI_TAR_GZIP: + case OCI_MT_LAYER_OCI_TAR_ZSTD: + case OCI_MT_LAYER_DOCKER_TAR_GZIP: + case OCI_MT_LAYER_DOCKER_TAR_ZSTD: + return true; + default: + return false; + } +} + +bool oci_media_type_is_foreign(oci_media_type_t mt) +{ + switch (mt) { + case OCI_MT_LAYER_FOREIGN_OCI: + case OCI_MT_LAYER_FOREIGN_OCI_GZIP: + case OCI_MT_LAYER_FOREIGN_DOCKER: + case OCI_MT_LAYER_FOREIGN_DOCKER_GZIP: + return true; + default: + return false; + } +} + +oci_compression_t oci_media_type_compression(oci_media_type_t mt) +{ + switch (mt) { + case OCI_MT_LAYER_OCI_TAR_GZIP: + case OCI_MT_LAYER_DOCKER_TAR_GZIP: + case OCI_MT_LAYER_FOREIGN_OCI_GZIP: + case OCI_MT_LAYER_FOREIGN_DOCKER_GZIP: + return OCI_COMPRESSION_GZIP; + case OCI_MT_LAYER_OCI_TAR_ZSTD: + case OCI_MT_LAYER_DOCKER_TAR_ZSTD: + return OCI_COMPRESSION_ZSTD; + default: + return OCI_COMPRESSION_NONE; + } +} diff --git a/src/oci/media-type.h b/src/oci/media-type.h new file mode 100644 index 0000000..66a2a1b --- /dev/null +++ b/src/oci/media-type.h @@ -0,0 +1,93 @@ +/* OCI / Docker media-type canonicalization + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * OCI image references carry media-type strings on every descriptor. The + * registry client, manifest parser, and unpack stage all branch on the media + * type, so a single canonical enum lookup keeps the comparisons one place + * away from string typos. Docker registries continue to serve the legacy + * docker-namespaced media types (vnd.docker.distribution.manifest.v2+json) + * even when the image-spec wire format is OCI v1; the table accepts both. + * + * Foreign (nondistributable) layers are recognized but classified as + * unsupported per oci-roadmap.md Q3: elfuse cannot fetch the out-of-band + * payload those layers reference, so rejecting them at parse time is the + * honest answer rather than carrying a half-supported code path. + */ + +#pragma once + +#include + +typedef enum { + OCI_MT_UNKNOWN = 0, + + /* Manifest documents (single platform). */ + OCI_MT_MANIFEST_OCI, + OCI_MT_MANIFEST_DOCKER, + + /* Image index / manifest list (multi-platform). */ + OCI_MT_INDEX_OCI, + OCI_MT_INDEX_DOCKER, + + /* Image config blob. */ + OCI_MT_CONFIG_OCI, + OCI_MT_CONFIG_DOCKER, + + /* Layer blobs that elfuse can actually consume. */ + OCI_MT_LAYER_OCI_TAR, + OCI_MT_LAYER_OCI_TAR_GZIP, + OCI_MT_LAYER_OCI_TAR_ZSTD, + OCI_MT_LAYER_DOCKER_TAR_GZIP, + OCI_MT_LAYER_DOCKER_TAR_ZSTD, + + /* Foreign layers: distinguishable but explicitly unsupported. */ + OCI_MT_LAYER_FOREIGN_OCI, + OCI_MT_LAYER_FOREIGN_OCI_GZIP, + OCI_MT_LAYER_FOREIGN_DOCKER, + OCI_MT_LAYER_FOREIGN_DOCKER_GZIP, +} oci_media_type_t; + +typedef enum { + OCI_COMPRESSION_NONE, + OCI_COMPRESSION_GZIP, + OCI_COMPRESSION_ZSTD, +} oci_compression_t; + +/* Classify a media-type string. Trailing parameters after ';' (e.g. charset) + * are stripped before matching; surrounding whitespace is ignored. Returns + * OCI_MT_UNKNOWN for any string not in the recognized table. NULL is treated + * as OCI_MT_UNKNOWN. + */ +oci_media_type_t oci_media_type_parse(const char *s); + +/* Lookup the canonical name string for a media-type enum. Returns NULL for + * OCI_MT_UNKNOWN or an out-of-range enum value. The returned pointer is to + * static storage. + */ +const char *oci_media_type_name(oci_media_type_t mt); + +/* Predicates by document category. Each returns false for OCI_MT_UNKNOWN. */ +bool oci_media_type_is_manifest(oci_media_type_t mt); +bool oci_media_type_is_index(oci_media_type_t mt); +bool oci_media_type_is_config(oci_media_type_t mt); +bool oci_media_type_is_layer(oci_media_type_t mt); + +/* True when the layer media type is one elfuse can actually decode. Foreign + * layers and OCI_MT_UNKNOWN return false; the manifest parser rejects layer + * descriptors that fail this check. + */ +bool oci_media_type_is_layer_supported(oci_media_type_t mt); + +/* True for the four foreign-layer media types. The manifest parser keeps + * these distinguishable so the error message can name the actual layer type + * instead of a generic 'unsupported'. + */ +bool oci_media_type_is_foreign(oci_media_type_t mt); + +/* Compression algorithm carried by a layer media type. Non-layer or unknown + * inputs return OCI_COMPRESSION_NONE; callers should gate on + * oci_media_type_is_layer first. + */ +oci_compression_t oci_media_type_compression(oci_media_type_t mt); diff --git a/tests/test-oci-manifest.c b/tests/test-oci-manifest.c new file mode 100644 index 0000000..4508117 --- /dev/null +++ b/tests/test-oci-manifest.c @@ -0,0 +1,748 @@ +/* OCI manifest / image-index / image-config parser unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Native macOS test (no HVF, no codesign). Exercises src/oci/manifest.c and + * src/oci/media-type.c with inline JSON fixtures so the suite stays under + * one file and the assertions are auditable from the source. + * + * Build: see mk/tests.mk target test-oci-manifest. + * Run: build/test-oci-manifest + */ + +#include +#include +#include + +#include "oci/manifest.h" +#include "oci/media-type.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int total = 0; +static int passed = 0; + +static void report_pass(const char *name) +{ + total++; + passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *detail) +{ + total++; + printf(" " RED "FAIL" RESET " %s: %s\n", name, detail ? detail : ""); +} + +#define CHECK(cond, name, detail) \ + do { \ + if (cond) \ + report_pass(name); \ + else \ + report_fail(name, (detail)); \ + } while (0) + +/* ── media-type module ─────────────────────────────────────────── */ + +static void test_media_type_recognized(void) +{ + struct { + const char *in; + oci_media_type_t want; + } cases[] = { + {"application/vnd.oci.image.manifest.v1+json", OCI_MT_MANIFEST_OCI}, + {"application/vnd.docker.distribution.manifest.v2+json", + OCI_MT_MANIFEST_DOCKER}, + {"application/vnd.oci.image.index.v1+json", OCI_MT_INDEX_OCI}, + {"application/vnd.docker.distribution.manifest.list.v2+json", + OCI_MT_INDEX_DOCKER}, + {"application/vnd.oci.image.config.v1+json", OCI_MT_CONFIG_OCI}, + {"application/vnd.docker.container.image.v1+json", + OCI_MT_CONFIG_DOCKER}, + {"application/vnd.oci.image.layer.v1.tar", OCI_MT_LAYER_OCI_TAR}, + {"application/vnd.oci.image.layer.v1.tar+gzip", + OCI_MT_LAYER_OCI_TAR_GZIP}, + {"application/vnd.oci.image.layer.v1.tar+zstd", + OCI_MT_LAYER_OCI_TAR_ZSTD}, + {"application/vnd.docker.image.rootfs.diff.tar.gzip", + OCI_MT_LAYER_DOCKER_TAR_GZIP}, + {"application/vnd.oci.image.layer.nondistributable.v1.tar+gzip", + OCI_MT_LAYER_FOREIGN_OCI_GZIP}, + }; + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { + oci_media_type_t got = oci_media_type_parse(cases[i].in); + char name[256]; + snprintf(name, sizeof(name), "media_type parse: %s", cases[i].in); + CHECK(got == cases[i].want, name, "wrong enum value"); + } +} + +static void test_media_type_strip_params(void) +{ + /* charset / boundary parameters and whitespace must not defeat the + * lookup; the registry sometimes annotates Content-Type with charset. + */ + oci_media_type_t got = oci_media_type_parse( + " application/vnd.oci.image.manifest.v1+json ; charset=utf-8 "); + CHECK(got == OCI_MT_MANIFEST_OCI, "media_type strips params + whitespace", + "did not canonicalize"); +} + +static void test_media_type_unknown(void) +{ + CHECK(oci_media_type_parse(NULL) == OCI_MT_UNKNOWN, + "media_type NULL -> UNKNOWN", "expected UNKNOWN"); + CHECK(oci_media_type_parse("") == OCI_MT_UNKNOWN, + "media_type empty -> UNKNOWN", "expected UNKNOWN"); + CHECK(oci_media_type_parse("text/plain") == OCI_MT_UNKNOWN, + "media_type bogus -> UNKNOWN", "expected UNKNOWN"); +} + +static void test_media_type_predicates(void) +{ + CHECK(oci_media_type_is_manifest(OCI_MT_MANIFEST_OCI), + "predicate manifest OCI", NULL); + CHECK(oci_media_type_is_manifest(OCI_MT_MANIFEST_DOCKER), + "predicate manifest Docker", NULL); + CHECK(!oci_media_type_is_manifest(OCI_MT_INDEX_OCI), + "predicate manifest rejects index", NULL); + + CHECK(oci_media_type_is_index(OCI_MT_INDEX_OCI), "predicate index OCI", + NULL); + CHECK(oci_media_type_is_index(OCI_MT_INDEX_DOCKER), + "predicate index Docker", NULL); + + CHECK(oci_media_type_is_config(OCI_MT_CONFIG_OCI), "predicate config OCI", + NULL); + CHECK(oci_media_type_is_layer(OCI_MT_LAYER_OCI_TAR_GZIP), + "predicate layer", NULL); + CHECK(oci_media_type_is_layer(OCI_MT_LAYER_FOREIGN_OCI_GZIP), + "predicate layer includes foreign", NULL); + CHECK(!oci_media_type_is_layer_supported(OCI_MT_LAYER_FOREIGN_OCI_GZIP), + "predicate layer_supported excludes foreign", NULL); + CHECK(oci_media_type_is_layer_supported(OCI_MT_LAYER_OCI_TAR_GZIP), + "predicate layer_supported true for gzip", NULL); + CHECK(oci_media_type_is_layer_supported(OCI_MT_LAYER_OCI_TAR_ZSTD), + "predicate layer_supported true for zstd", NULL); + CHECK(oci_media_type_is_foreign(OCI_MT_LAYER_FOREIGN_DOCKER_GZIP), + "predicate foreign true for docker foreign gzip", NULL); +} + +static void test_media_type_compression(void) +{ + CHECK(oci_media_type_compression(OCI_MT_LAYER_OCI_TAR_GZIP) == + OCI_COMPRESSION_GZIP, + "compression gzip", NULL); + CHECK(oci_media_type_compression(OCI_MT_LAYER_OCI_TAR_ZSTD) == + OCI_COMPRESSION_ZSTD, + "compression zstd", NULL); + CHECK(oci_media_type_compression(OCI_MT_LAYER_OCI_TAR) == + OCI_COMPRESSION_NONE, + "compression none for uncompressed tar", NULL); +} + +/* ── manifest parser ────────────────────────────────────────────── */ + +static const char OCI_MANIFEST_GOOD[] = + "{" + " \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"config\": {" + " \"mediaType\": \"application/vnd.oci.image.config.v1+json\"," + " \"digest\": " + "\"sha256:" + "1f1fa1e4d3a92b2c5e1b7a90d6c7a8e9f0a1b2c3d4e5f60718293a4b5c6d7e8f\"," + " \"size\": 1234" + " }," + " \"layers\": [" + " {" + " \"mediaType\": \"application/vnd.oci.image.layer.v1.tar+gzip\"," + " \"digest\": " + "\"sha256:" + "abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd\"," + " \"size\": 56789" + " }," + " {" + " \"mediaType\": \"application/vnd.oci.image.layer.v1.tar+zstd\"," + " \"digest\": " + "\"sha256:" + "fedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedc\"," + " \"size\": 1024" + " }" + " ]" + "}"; + +static const char DOCKER_MANIFEST_GOOD[] = + "{" + " \"schemaVersion\": 2," + " \"mediaType\": " + "\"application/vnd.docker.distribution.manifest.v2+json\"," + " \"config\": {" + " \"mediaType\": \"application/vnd.docker.container.image.v1+json\"," + " \"digest\": " + "\"sha256:" + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef\"," + " \"size\": 4096" + " }," + " \"layers\": [" + " {" + " \"mediaType\": " + "\"application/vnd.docker.image.rootfs.diff.tar.gzip\"," + " \"digest\": " + "\"sha256:" + "0123456789012345678901234567890123456789012345678901234567890123\"," + " \"size\": 99" + " }" + " ]" + "}"; + +static void test_manifest_oci_happy(void) +{ + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(OCI_MANIFEST_GOOD, + sizeof(OCI_MANIFEST_GOOD) - 1, &m, &err); + if (rc != 0) { + report_fail("manifest OCI happy", err ? err : "parse failed"); + return; + } + CHECK(m.schema_version == 2, "manifest OCI schemaVersion", NULL); + CHECK(m.media_type == OCI_MT_MANIFEST_OCI, "manifest OCI mediaType", NULL); + CHECK(m.config.media_type == OCI_MT_CONFIG_OCI, + "manifest OCI config mediaType", NULL); + CHECK(m.config.algo == OCI_DIGEST_SHA256, "manifest OCI config algo", + NULL); + CHECK(m.config.size == 1234, "manifest OCI config size", NULL); + CHECK(m.nlayers == 2, "manifest OCI two layers", NULL); + CHECK(m.layers[0].media_type == OCI_MT_LAYER_OCI_TAR_GZIP, + "manifest OCI layer[0] gzip", NULL); + CHECK(m.layers[1].media_type == OCI_MT_LAYER_OCI_TAR_ZSTD, + "manifest OCI layer[1] zstd", NULL); + CHECK(m.layers[0].size == 56789, "manifest OCI layer[0] size", NULL); + oci_manifest_free(&m); +} + +static void test_manifest_docker_happy(void) +{ + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(DOCKER_MANIFEST_GOOD, + sizeof(DOCKER_MANIFEST_GOOD) - 1, &m, &err); + if (rc != 0) { + report_fail("manifest Docker happy", err ? err : "parse failed"); + return; + } + CHECK(m.media_type == OCI_MT_MANIFEST_DOCKER, + "manifest Docker mediaType", NULL); + CHECK(m.config.media_type == OCI_MT_CONFIG_DOCKER, + "manifest Docker config mediaType", NULL); + CHECK(m.nlayers == 1, "manifest Docker one layer", NULL); + CHECK(m.layers[0].media_type == OCI_MT_LAYER_DOCKER_TAR_GZIP, + "manifest Docker layer[0] gzip", NULL); + oci_manifest_free(&m); +} + +static void test_manifest_malformed_json(void) +{ + oci_manifest_t m; + const char *err = NULL; + const char bogus[] = "{ this is not json"; + int rc = oci_manifest_parse(bogus, sizeof(bogus) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, "manifest malformed JSON rejected", + err ? err : "expected -1 with err"); +} + +static void test_manifest_wrong_schema(void) +{ + const char j[] = + "{ \"schemaVersion\": 1," + " \"config\": {" + " \"mediaType\": \"application/vnd.oci.image.config.v1+json\"," + " \"digest\": " + "\"sha256:" + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef\"," + " \"size\": 1 }," + " \"layers\": [] }"; + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(j, sizeof(j) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, "manifest schemaVersion != 2 rejected", + err); +} + +static void test_manifest_missing_config(void) +{ + const char j[] = + "{ \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"layers\": [] }"; + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(j, sizeof(j) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, "manifest missing config rejected", err); +} + +static void test_manifest_bad_digest(void) +{ + const char j[] = + "{ \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"config\": {" + " \"mediaType\": \"application/vnd.oci.image.config.v1+json\"," + " \"digest\": \"sha256:DEADBEEF\"," + " \"size\": 1 }," + " \"layers\": [] }"; + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(j, sizeof(j) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, + "manifest uppercase / short digest rejected", err); +} + +static void test_manifest_negative_size(void) +{ + const char j[] = + "{ \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"config\": {" + " \"mediaType\": \"application/vnd.oci.image.config.v1+json\"," + " \"digest\": " + "\"sha256:" + "abababababababababababababababababababababababababababababababab\"," + " \"size\": -1 }," + " \"layers\": [] }"; + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(j, sizeof(j) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, "manifest negative size rejected", err); +} + +static void test_manifest_fractional_size(void) +{ + const char j[] = + "{ \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"config\": {" + " \"mediaType\": \"application/vnd.oci.image.config.v1+json\"," + " \"digest\": " + "\"sha256:" + "abababababababababababababababababababababababababababababababab\"," + " \"size\": 1.5 }," + " \"layers\": [] }"; + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(j, sizeof(j) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, "manifest fractional size rejected", err); +} + +static void test_manifest_foreign_layer_rejected(void) +{ + const char j[] = + "{ \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"config\": {" + " \"mediaType\": \"application/vnd.oci.image.config.v1+json\"," + " \"digest\": " + "\"sha256:" + "1f1fa1e4d3a92b2c5e1b7a90d6c7a8e9f0a1b2c3d4e5f60718293a4b5c6d7e8f\"," + " \"size\": 1 }," + " \"layers\": [ {" + " \"mediaType\": " + "\"application/vnd.oci.image.layer.nondistributable.v1.tar+gzip\"," + " \"digest\": " + "\"sha256:" + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef\"," + " \"size\": 1 } ] }"; + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(j, sizeof(j) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, "manifest foreign layer rejected", err); +} + +static void test_manifest_wrong_config_mediatype(void) +{ + const char j[] = + "{ \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"config\": {" + " \"mediaType\": \"application/vnd.oci.image.layer.v1.tar+gzip\"," + " \"digest\": " + "\"sha256:" + "1f1fa1e4d3a92b2c5e1b7a90d6c7a8e9f0a1b2c3d4e5f60718293a4b5c6d7e8f\"," + " \"size\": 1 }," + " \"layers\": [] }"; + oci_manifest_t m; + const char *err = NULL; + int rc = oci_manifest_parse(j, sizeof(j) - 1, &m, &err); + CHECK(rc == -1 && err != NULL, + "manifest config descriptor with non-config mediaType rejected", + err); +} + +/* ── index parser + platform selection ──────────────────────────── */ + +static const char OCI_INDEX_MULTIARCH[] = + "{" + " \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.index.v1+json\"," + " \"manifests\": [" + " {" + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"digest\": " + "\"sha256:" + "1111111111111111111111111111111111111111111111111111111111111111\"," + " \"size\": 100," + " \"platform\": { \"architecture\": \"amd64\", \"os\": \"linux\" }" + " }," + " {" + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"digest\": " + "\"sha256:" + "2222222222222222222222222222222222222222222222222222222222222222\"," + " \"size\": 200," + " \"platform\": { \"architecture\": \"arm64\", \"os\": \"linux\"," + " \"variant\": \"v8\" }" + " }," + " {" + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"digest\": " + "\"sha256:" + "3333333333333333333333333333333333333333333333333333333333333333\"," + " \"size\": 300," + " \"platform\": { \"architecture\": \"ppc64le\", \"os\": \"linux\" }" + " }" + " ]" + "}"; + +static void test_index_oci_pick_v8(void) +{ + oci_index_t idx; + const char *err = NULL; + int rc = oci_index_parse(OCI_INDEX_MULTIARCH, + sizeof(OCI_INDEX_MULTIARCH) - 1, &idx, &err); + if (rc != 0) { + report_fail("index OCI parse", err ? err : "parse failed"); + return; + } + CHECK(idx.nentries == 3, "index has three entries", NULL); + const oci_index_entry_t *pick = oci_index_pick_linux_arm64(&idx); + CHECK(pick != NULL, "index picks linux/arm64", NULL); + if (pick) { + CHECK(strcmp(pick->platform.architecture, "arm64") == 0, + "picked arch arm64", NULL); + CHECK(strcmp(pick->platform.variant, "v8") == 0, + "picked variant v8 wins over no-variant", NULL); + } + oci_index_free(&idx); +} + +/* When v8 is absent, the entry without an explicit variant is preferred. */ +static const char OCI_INDEX_NO_V8[] = + "{" + " \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.index.v1+json\"," + " \"manifests\": [" + " {" + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"digest\": " + "\"sha256:" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"," + " \"size\": 100," + " \"platform\": { \"architecture\": \"arm64\", \"os\": \"linux\"," + " \"variant\": \"v7\" }" + " }," + " {" + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"digest\": " + "\"sha256:" + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\"," + " \"size\": 200," + " \"platform\": { \"architecture\": \"arm64\", \"os\": \"linux\" }" + " }" + " ]" + "}"; + +static void test_index_oci_pick_empty_variant(void) +{ + oci_index_t idx; + const char *err = NULL; + int rc = + oci_index_parse(OCI_INDEX_NO_V8, sizeof(OCI_INDEX_NO_V8) - 1, &idx, + &err); + if (rc != 0) { + report_fail("index parse no-v8", err ? err : "parse failed"); + return; + } + const oci_index_entry_t *pick = oci_index_pick_linux_arm64(&idx); + CHECK(pick != NULL, "index picks linux/arm64 without v8", NULL); + if (pick) + CHECK(pick->platform.variant[0] == '\0', + "no-variant entry wins over v7 when v8 absent", NULL); + oci_index_free(&idx); +} + +static const char OCI_INDEX_NO_LINUX_ARM64[] = + "{" + " \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.index.v1+json\"," + " \"manifests\": [" + " {" + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"digest\": " + "\"sha256:" + "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\"," + " \"size\": 100," + " \"platform\": { \"architecture\": \"amd64\", \"os\": \"linux\" }" + " }," + " {" + " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\"," + " \"digest\": " + "\"sha256:" + "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\"," + " \"size\": 200," + " \"platform\": { \"architecture\": \"arm64\", \"os\": \"darwin\" }" + " }" + " ]" + "}"; + +static void test_index_no_match_returns_null(void) +{ + oci_index_t idx; + const char *err = NULL; + int rc = oci_index_parse(OCI_INDEX_NO_LINUX_ARM64, + sizeof(OCI_INDEX_NO_LINUX_ARM64) - 1, &idx, + &err); + if (rc != 0) { + report_fail("index parse no-linux-arm64", err ? err : "parse failed"); + return; + } + CHECK(oci_index_pick_linux_arm64(&idx) == NULL, + "index returns NULL when no linux/arm64 entry exists", NULL); + oci_index_free(&idx); +} + +static const char DOCKER_INDEX_MULTIARCH[] = + "{" + " \"schemaVersion\": 2," + " \"mediaType\": " + "\"application/vnd.docker.distribution.manifest.list.v2+json\"," + " \"manifests\": [" + " {" + " \"mediaType\": " + "\"application/vnd.docker.distribution.manifest.v2+json\"," + " \"digest\": " + "\"sha256:" + "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee\"," + " \"size\": 200," + " \"platform\": { \"architecture\": \"arm64\", \"os\": \"linux\"," + " \"variant\": \"v8\" }" + " }" + " ]" + "}"; + +static void test_index_docker_happy(void) +{ + oci_index_t idx; + const char *err = NULL; + int rc = oci_index_parse(DOCKER_INDEX_MULTIARCH, + sizeof(DOCKER_INDEX_MULTIARCH) - 1, &idx, &err); + if (rc != 0) { + report_fail("index Docker parse", err ? err : "parse failed"); + return; + } + CHECK(idx.media_type == OCI_MT_INDEX_DOCKER, "index Docker mediaType", + NULL); + CHECK(idx.nentries == 1, "index Docker entry count", NULL); + const oci_index_entry_t *pick = oci_index_pick_linux_arm64(&idx); + CHECK(pick != NULL, "Docker index picks linux/arm64/v8", NULL); + oci_index_free(&idx); +} + +/* If the index's arm64 entry has an unknown manifest media type, the picker + * skips it: the registry fetch path cannot consume the resulting manifest. + */ +static const char OCI_INDEX_BAD_ARM64_MEDIATYPE[] = + "{" + " \"schemaVersion\": 2," + " \"mediaType\": \"application/vnd.oci.image.index.v1+json\"," + " \"manifests\": [" + " {" + " \"mediaType\": \"application/vnd.cncf.helm.config.v1+json\"," + " \"digest\": " + "\"sha256:" + "1212121212121212121212121212121212121212121212121212121212121212\"," + " \"size\": 50," + " \"platform\": { \"architecture\": \"arm64\", \"os\": \"linux\" }" + " }" + " ]" + "}"; + +static void test_index_skips_unknown_mediatype(void) +{ + oci_index_t idx; + const char *err = NULL; + int rc = oci_index_parse(OCI_INDEX_BAD_ARM64_MEDIATYPE, + sizeof(OCI_INDEX_BAD_ARM64_MEDIATYPE) - 1, &idx, + &err); + if (rc != 0) { + report_fail("index unknown-mt parse", err ? err : "parse failed"); + return; + } + /* Parse must succeed (unknown media type is recorded, not rejected). + * Picker skips the entry because it cannot be consumed. + */ + CHECK(idx.nentries == 1, + "index keeps unknown-mediaType entries during parse", NULL); + CHECK(oci_index_pick_linux_arm64(&idx) == NULL, + "picker skips unknown-mediaType arm64 entry", NULL); + oci_index_free(&idx); +} + +/* ── image config parser ────────────────────────────────────────── */ + +static const char OCI_IMAGE_CONFIG_GOOD[] = + "{" + " \"created\": \"2026-01-02T03:04:05Z\"," + " \"architecture\": \"arm64\"," + " \"os\": \"linux\"," + " \"variant\": \"v8\"," + " \"config\": {" + " \"User\": \"1000:1000\"," + " \"Env\": [\"PATH=/usr/bin\", \"FOO=bar\"]," + " \"Entrypoint\": [\"/bin/sh\"]," + " \"Cmd\": [\"-c\", \"echo ok\"]," + " \"WorkingDir\": \"/home/alice\"" + " }," + " \"rootfs\": {" + " \"type\": \"layers\"," + " \"diff_ids\": [" + " " + "\"sha256:" + "4444444444444444444444444444444444444444444444444444444444444444\"," + " " + "\"sha256:" + "5555555555555555555555555555555555555555555555555555555555555555\"" + " ]" + " }" + "}"; + +static void test_image_config_happy(void) +{ + oci_image_config_t c; + const char *err = NULL; + int rc = oci_image_config_parse(OCI_IMAGE_CONFIG_GOOD, + sizeof(OCI_IMAGE_CONFIG_GOOD) - 1, &c, + &err); + if (rc != 0) { + report_fail("image config happy", err ? err : "parse failed"); + return; + } + CHECK(strcmp(c.architecture, "arm64") == 0, "image config architecture", + NULL); + CHECK(strcmp(c.os, "linux") == 0, "image config os", NULL); + CHECK(c.variant && strcmp(c.variant, "v8") == 0, "image config variant", + NULL); + CHECK(c.config.user && strcmp(c.config.user, "1000:1000") == 0, + "image config User", NULL); + CHECK(c.config.working_dir && + strcmp(c.config.working_dir, "/home/alice") == 0, + "image config WorkingDir", NULL); + CHECK(c.config.env && c.config.env[0] && + strcmp(c.config.env[0], "PATH=/usr/bin") == 0, + "image config Env[0]", NULL); + CHECK(c.config.env && c.config.env[1] && + strcmp(c.config.env[1], "FOO=bar") == 0 && !c.config.env[2], + "image config Env terminator", NULL); + CHECK(c.config.entrypoint && c.config.entrypoint[0] && + strcmp(c.config.entrypoint[0], "/bin/sh") == 0 && + !c.config.entrypoint[1], + "image config Entrypoint", NULL); + CHECK(c.config.cmd && c.config.cmd[0] && c.config.cmd[1] && + strcmp(c.config.cmd[0], "-c") == 0 && + strcmp(c.config.cmd[1], "echo ok") == 0 && !c.config.cmd[2], + "image config Cmd", NULL); + CHECK(c.rootfs_diff_ids && c.rootfs_diff_ids[0] && + c.rootfs_diff_ids[1] && !c.rootfs_diff_ids[2], + "image config two diff_ids", NULL); + oci_image_config_free(&c); +} + +static void test_image_config_missing_rootfs(void) +{ + const char j[] = + "{ \"architecture\": \"arm64\", \"os\": \"linux\" }"; + oci_image_config_t c; + const char *err = NULL; + int rc = oci_image_config_parse(j, sizeof(j) - 1, &c, &err); + CHECK(rc == -1 && err != NULL, "image config missing rootfs rejected", + err); +} + +static void test_image_config_bad_rootfs_type(void) +{ + const char j[] = + "{ \"architecture\": \"arm64\", \"os\": \"linux\"," + " \"rootfs\": { \"type\": \"snapshot\"," + " \"diff_ids\": [" + " \"sha256:" + "4444444444444444444444444444444444444444444444444444444444444444\"" + " ] } }"; + oci_image_config_t c; + const char *err = NULL; + int rc = oci_image_config_parse(j, sizeof(j) - 1, &c, &err); + CHECK(rc == -1 && err != NULL, + "image config non-layers rootfs.type rejected", err); +} + +static void test_image_config_bad_diff_id(void) +{ + /* rootfs.diff_ids must be lowercase :. */ + const char j[] = + "{ \"architecture\": \"arm64\", \"os\": \"linux\"," + " \"rootfs\": { \"type\": \"layers\"," + " \"diff_ids\": [\"sha256:NOTLOWER\"] } }"; + oci_image_config_t c; + const char *err = NULL; + int rc = oci_image_config_parse(j, sizeof(j) - 1, &c, &err); + CHECK(rc == -1 && err != NULL, "image config bad diff_id rejected", err); +} + +/* ── main ──────────────────────────────────────────────────────── */ + +int main(void) +{ + test_media_type_recognized(); + test_media_type_strip_params(); + test_media_type_unknown(); + test_media_type_predicates(); + test_media_type_compression(); + + test_manifest_oci_happy(); + test_manifest_docker_happy(); + test_manifest_malformed_json(); + test_manifest_wrong_schema(); + test_manifest_missing_config(); + test_manifest_bad_digest(); + test_manifest_negative_size(); + test_manifest_fractional_size(); + test_manifest_foreign_layer_rejected(); + test_manifest_wrong_config_mediatype(); + + test_index_oci_pick_v8(); + test_index_oci_pick_empty_variant(); + test_index_no_match_returns_null(); + test_index_docker_happy(); + test_index_skips_unknown_mediatype(); + + test_image_config_happy(); + test_image_config_missing_rootfs(); + test_image_config_bad_rootfs_type(); + test_image_config_bad_diff_id(); + + printf("\n%d/%d passed\n", passed, total); + return passed == total ? 0 : 1; +} From cc97d971b3cd133b27ba2c113942b9c4e2b4b10e Mon Sep 17 00:00:00 2001 From: Max042004 Date: Fri, 15 May 2026 15:35:00 +0800 Subject: [PATCH 4/7] Add OCI registry HTTPS client (anonymous + bearer token challenge) Fourth slice of Phase 1 from issue #31, split into 4a here. Lands the HTTP fetch substrate that connects the slice-3 manifest parsers to a real registry and streams blob bodies into the slice-2 content-addressed store, all behind a single fetcher handle. No CLI wiring yet (elfuse oci pull still returns rc=2); slice 5 connects the pull command to this layer, persists the manifest graph, and pins the resolved tag-to-digest. Slice 4 was cut into 4a / 4b per oci-roadmap.md Q7 so each slice stays under the ~800 LOC review budget. 4a covers the anonymous Docker Hub / GHCR public-pull subset: anonymous GET, 401 + Www-Authenticate Bearer challenge, token fetch, retry, blob streaming with declared-size cap and on-commit digest verification. 4b will add basic auth, --insecure-ca custom CA, and --insecure loopback-gated TLS verify off. src/oci/fetch.{c,h} wraps libcurl. A fetcher owns one CURL easy handle, one cached bearer token, and the most recent Www-Authenticate challenge. The first request is anonymous. If the registry replies 401, the header parser captures realm / service / scope, fetch_token GETs the realm with those parameters, the JSON response is parsed with cJSON, and the original request is retried once with Authorization: Bearer . The cached token is reused for subsequent calls on the same fetcher so a manifest plus N layer pulls cost one token round trip rather than N+1. docker.io is rewritten to registry-1.docker.io because the reference parser stores the canonical name while the actual API host differs. The blob path is content-addressed end to end. oci_fetch_blob short circuits when the descriptor is already present in the store; otherwise it opens an oci_blob_writer keyed by the descriptor digest, streams response body chunks through the writer, and tracks a running byte count capped at the descriptor's declared size so a hostile server cannot stream forever. The writer's own digest check at commit time rejects any payload that hashes to anything other than the descriptor hex. Size mismatch, digest mismatch, transport error, and non-2xx all unwind via oci_blob_writer_abort so an interrupted pull never leaves a visible-complete blob behind. CURLOPT_FOLLOWLOCATION is enabled so the common case where a registry 307s blob fetches to S3 / Cloudfront with a pre-signed URL works transparently; libcurl strips the Authorization header on cross-host redirects, which is exactly what the storage backend expects. The header parser keys on Content-Type, Docker-Content-Digest, and Www-Authenticate. Content-Type is stripped of charset/parameters before the manifest parser sees it so the canonicalization matches the mediaType field inside the JSON body. Docker-Content-Digest is captured verbatim so the upcoming tag-to-digest pinning in slice 5 can record the registry's resolved digest without recomputing. Response body accumulation has a 16 MiB ceiling (FETCH_BODY_MAX) so an unbounded reply cannot fill memory; real manifests, indexes, and image configs are orders of magnitude below this. Blob responses bypass the buffer entirely and stream straight through the writer. tests/test-oci-fetch.c spawns an in-process HTTP/1.1 mock server bound to 127.0.0.1 on an ephemeral port and drives the fetcher against scripted handlers. Nine offline cases exercise anonymous manifest GET (body, Content-Type stripping, Docker-Content-Digest capture); manifest 404 surfaces with the right status; bearer challenge runs the full 401 then token then retry sequence and inspects the request log to verify the second hop hits /token and the third carries the Bearer header; cached token reuse on a second fetch confirms no re-challenge round trip; blob success commits a known-good payload to the store; already-cached blob short-circuits with zero server requests; oversize response is rejected and leaves no visible blob; digest mismatch on a correctly-sized payload is rejected at commit; blob 404 fails cleanly. An opt-in tenth case behind OCI_FETCH_ONLINE=1 pulls alpine:3.20 from Docker Hub through the real bearer flow as a smoke test; it is wired as make test-oci-fetch-online and is not part of make check. Makefile adds src/oci/fetch.c to SRCS and -lcurl to HVF_LDFLAGS so the production elfuse binary links libcurl from the macOS SDK (no vendoring per oci-roadmap.md Q7 and Q9). build/test-oci-fetch links libcurl plus pthread for the mock server. mk/config.mk registers the test source in NATIVE_TESTS so the cross-compile pattern rule does not try to aarch64-compile it. mk/tests.mk adds test-oci-fetch as the final stage of make check and exposes test-oci-fetch-online as a separate target. make check stays green: 78 unit tests, busybox 81/0/3, proctitle, procfs-exec, timeout-disable, OCI-ref 34/34, OCI-digest 25/25, OCI-blob-store 14/14, OCI-manifest 76/76, OCI-fetch 9/9. --- Makefile | 12 +- mk/config.mk | 2 +- mk/tests.mk | 14 + src/oci/fetch.c | 850 ++++++++++++++++++++++++++++++++++++ src/oci/fetch.h | 128 ++++++ tests/test-oci-fetch.c | 948 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1951 insertions(+), 3 deletions(-) create mode 100644 src/oci/fetch.c create mode 100644 src/oci/fetch.h create mode 100644 tests/test-oci-fetch.c diff --git a/Makefile b/Makefile index 089570b..b22f494 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,8 @@ SRCS := \ oci/digest.c \ oci/blob-store.c \ oci/media-type.c \ - oci/manifest.c + oci/manifest.c \ + oci/fetch.c SRCS := $(addprefix src/,$(SRCS)) OBJS := $(patsubst src/%.c,$(BUILD_DIR)/%.o,$(SRCS)) @@ -89,7 +90,7 @@ $(CJSON_OBJ): $(CJSON_DIR)/cJSON.c $(CJSON_DIR)/cJSON.h | $(BUILD_DIR) DISPATCH_MANIFEST := src/syscall/dispatch.tbl DISPATCH_GENERATOR := scripts/gen-syscall-dispatch.py DISPATCH_HEADER := $(BUILD_DIR)/dispatch.h -HVF_LDFLAGS := -framework Hypervisor -arch arm64 +HVF_LDFLAGS := -framework Hypervisor -arch arm64 -lcurl # Generated headers under build/ that must exist before compiling sources that # include them. @@ -167,6 +168,13 @@ $(BUILD_DIR)/test-oci-manifest: $(BUILD_DIR)/test-oci-manifest.o $(BUILD_DIR)/oc @echo " LD $@" $(Q)$(CC) $(CFLAGS) -o $@ $^ +## Build the OCI fetch (libcurl) unit test (native macOS, no HVF). Pulls in +## blob-store + digest + manifest models + cJSON; links against system libcurl +## and the platform pthread runtime for the in-process mock HTTP server. +$(BUILD_DIR)/test-oci-fetch: $(BUILD_DIR)/test-oci-fetch.o $(BUILD_DIR)/oci/fetch.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o $(BUILD_DIR)/oci/manifest.o $(BUILD_DIR)/oci/media-type.o $(BUILD_DIR)/oci/ref.o $(CJSON_OBJ) | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ -lcurl -lpthread + # ── Guest test binaries (cross-compiled, aarch64-linux) ────────── # Only used when GUEST_TEST_BINARIES is not set. diff --git a/mk/config.mk b/mk/config.mk index 81b4e69..b42e8f7 100644 --- a/mk/config.mk +++ b/mk/config.mk @@ -17,7 +17,7 @@ endif # Exclude native macOS test files from cross-compilation NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c tests/test-oci-ref.c \ tests/test-oci-digest.c tests/test-oci-blob-store.c \ - tests/test-oci-manifest.c + tests/test-oci-manifest.c tests/test-oci-fetch.c SPECIAL_TEST_SRCS := tests/test-lowbase-mem.c SPECIAL_TEST_BINS := $(BUILD_DIR)/test-lowbase-mem-200000 $(BUILD_DIR)/test-lowbase-mem-300000 diff --git a/mk/tests.mk b/mk/tests.mk index 6dcfe4f..ee0aad3 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -7,6 +7,7 @@ test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \ test-full test-multi-vcpu test-rwx \ test-oci-ref test-oci-digest test-oci-blob-store test-oci-manifest \ + test-oci-fetch test-oci-fetch-online \ test-sysroot-rename \ test-case-collision test-case-collision-fallback test-sysroot-create-paths \ test-proctitle-low-stack \ @@ -41,6 +42,8 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage @$(MAKE) --no-print-directory test-oci-blob-store @printf "\n$(BLUE)━━━ OCI manifest parser unit tests ━━━$(RESET)\n" @$(MAKE) --no-print-directory test-oci-manifest + @printf "\n$(BLUE)━━━ OCI fetch unit tests (offline mock HTTP) ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-fetch ## Run the OCI image reference parser unit tests (native, no HVF) test-oci-ref: $(BUILD_DIR)/test-oci-ref @@ -58,6 +61,17 @@ test-oci-blob-store: $(BUILD_DIR)/test-oci-blob-store test-oci-manifest: $(BUILD_DIR)/test-oci-manifest @$(BUILD_DIR)/test-oci-manifest +## Run the OCI fetch unit tests against an in-process mock HTTP server +## (native, no HVF, no network). +test-oci-fetch: $(BUILD_DIR)/test-oci-fetch + @$(BUILD_DIR)/test-oci-fetch + +## Pull alpine:3.20 from Docker Hub anonymously, verify manifest parse and +## blob digests against a real registry. Opt-in; requires network. Not run by +## `make check`. +test-oci-fetch-online: $(BUILD_DIR)/test-oci-fetch + @OCI_FETCH_ONLINE=1 $(BUILD_DIR)/test-oci-fetch + test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename @tmpdir=$$(mktemp -d); \ trap 'rm -rf "$$tmpdir"; rm -f /tmp/elfuse-sysroot-rename-dst.txt' EXIT; \ diff --git a/src/oci/fetch.c b/src/oci/fetch.c new file mode 100644 index 0000000..07cd2be --- /dev/null +++ b/src/oci/fetch.c @@ -0,0 +1,850 @@ +/* OCI registry HTTPS client + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Implements anonymous and bearer-challenge HTTPS pulls against the OCI + * distribution-spec /v2/ endpoints. Manifest fetches return body bytes plus a + * captured Content-Type and Docker-Content-Digest so the slice-3 parser and + * future tag-to-digest pinning can consume them directly. Blob fetches stream + * the response body into the slice-2 blob store, capping the running byte + * count at the descriptor's declared size and letting the writer's digest + * check reject any payload that hashes to anything other than the descriptor + * hex. + * + * The 401 retry path is "try anonymous first, then parse Www-Authenticate, + * fetch a token, retry once". A second 401 propagates as a fetch failure; the + * caller decides whether to surface authorization-failed or treat it as a + * transient network error. The cached bearer token is invalidated by any 401 + * but otherwise reused across requests on the same fetcher, so a pull of an + * image with N layers makes one token call rather than N+1. + */ + +#include "fetch.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../externals/cjson/cJSON.h" + +/* Hard ceiling on a single manifest / index / config response. Real-world + * documents are well under 1 MiB; the limit is here so a misbehaving registry + * cannot fill memory with an unbounded body. Blob responses do not flow + * through this buffer; they stream into the blob store. + */ +#define FETCH_BODY_MAX ((size_t) 16 * 1024 * 1024) + +typedef struct { + char *realm; + char *service; + char *scope; +} bearer_challenge_t; + +struct oci_fetcher { + CURL *easy; + char *base_url_override; + char *bearer_token; + bearer_challenge_t challenge; +}; + +static pthread_once_t g_curl_init_once = PTHREAD_ONCE_INIT; +static int g_curl_init_rc = -1; + +static void curl_global_once(void) +{ + g_curl_init_rc = curl_global_init(CURL_GLOBAL_DEFAULT) == CURLE_OK ? 0 : -1; +} + +int oci_fetch_global_init(void) +{ + pthread_once(&g_curl_init_once, curl_global_once); + if (g_curl_init_rc < 0) + errno = EIO; + return g_curl_init_rc; +} + +void oci_fetch_global_cleanup(void) +{ + /* curl_global_cleanup is not safe under threading. elfuse process lives + * for the duration of one pull so leaving libcurl initialized is fine. + */ +} + +static void bearer_challenge_free(bearer_challenge_t *c) +{ + if (!c) + return; + free(c->realm); + free(c->service); + free(c->scope); + c->realm = NULL; + c->service = NULL; + c->scope = NULL; +} + +oci_fetcher_t *oci_fetcher_new(const oci_fetcher_options_t *opts) +{ + if (oci_fetch_global_init() < 0) + return NULL; + oci_fetcher_t *f = calloc(1, sizeof(*f)); + if (!f) { + errno = ENOMEM; + return NULL; + } + f->easy = curl_easy_init(); + if (!f->easy) { + free(f); + errno = EIO; + return NULL; + } + if (opts && opts->base_url_override) { + f->base_url_override = strdup(opts->base_url_override); + if (!f->base_url_override) { + curl_easy_cleanup(f->easy); + free(f); + errno = ENOMEM; + return NULL; + } + } + return f; +} + +void oci_fetcher_free(oci_fetcher_t *f) +{ + if (!f) + return; + if (f->easy) + curl_easy_cleanup(f->easy); + free(f->base_url_override); + free(f->bearer_token); + bearer_challenge_free(&f->challenge); + free(f); +} + +void oci_fetch_response_free(oci_fetch_response_t *r) +{ + if (!r) + return; + free(r->body); + free(r->content_type); + free(r->docker_content_digest); + r->body = NULL; + r->content_type = NULL; + r->docker_content_digest = NULL; + r->body_len = 0; + r->http_status = 0; +} + +/* docker.io is the canonical registry name from the reference parser; the + * actual API host is registry-1.docker.io. Every other registry (ghcr.io, + * quay.io, public.ecr.aws, mirrors) uses its own host directly. + */ +static const char *api_host_for_registry(const char *reg) +{ + if (reg && !strcmp(reg, "docker.io")) + return "registry-1.docker.io"; + return reg; +} + +static char *build_base_url(const oci_fetcher_t *f, const oci_ref_t *ref) +{ + if (f->base_url_override) + return strdup(f->base_url_override); + const char *host = api_host_for_registry(ref->registry); + if (!host) + return NULL; + size_t n = strlen(host) + sizeof("https://"); + char *url = malloc(n); + if (!url) + return NULL; + snprintf(url, n, "https://%s", host); + return url; +} + +static char *build_manifest_url(const oci_fetcher_t *f, + const oci_ref_t *ref, + const char *selector) +{ + char *base = build_base_url(f, ref); + if (!base) + return NULL; + size_t n = strlen(base) + strlen(ref->repository) + strlen(selector) + + sizeof("/v2//manifests/"); + char *url = malloc(n); + if (!url) { + free(base); + return NULL; + } + snprintf(url, n, "%s/v2/%s/manifests/%s", base, ref->repository, selector); + free(base); + return url; +} + +static char *build_blob_url(const oci_fetcher_t *f, + const oci_ref_t *ref, + const char *digest_str) +{ + char *base = build_base_url(f, ref); + if (!base) + return NULL; + size_t n = strlen(base) + strlen(ref->repository) + strlen(digest_str) + + sizeof("/v2//blobs/"); + char *url = malloc(n); + if (!url) { + free(base); + return NULL; + } + snprintf(url, n, "%s/v2/%s/blobs/%s", base, ref->repository, digest_str); + free(base); + return url; +} + +typedef struct { + char *buf; + size_t len; + size_t cap; + size_t max; + bool overflow; +} body_buf_t; + +static size_t body_write_cb(char *ptr, size_t size, size_t nmemb, void *userdata) +{ + body_buf_t *b = userdata; + size_t n = size * nmemb; + if (b->overflow) + return 0; + if (b->len + n + 1 > b->max) { + b->overflow = true; + return 0; + } + if (b->len + n + 1 > b->cap) { + size_t newcap = b->cap ? b->cap : 4096; + while (newcap < b->len + n + 1) + newcap *= 2; + if (newcap > b->max + 1) + newcap = b->max + 1; + char *r = realloc(b->buf, newcap); + if (!r) { + b->overflow = true; + return 0; + } + b->buf = r; + b->cap = newcap; + } + memcpy(b->buf + b->len, ptr, n); + b->len += n; + b->buf[b->len] = '\0'; + return n; +} + +static char *trim_inplace(char *s) +{ + if (!s) + return NULL; + while (*s && isspace((unsigned char) *s)) + s++; + size_t n = strlen(s); + while (n > 0 && isspace((unsigned char) s[n - 1])) { + s[n - 1] = '\0'; + n--; + } + return s; +} + +static char *match_header(char *line, const char *key) +{ + size_t klen = strlen(key); + if (strncasecmp(line, key, klen) != 0) + return NULL; + if (line[klen] != ':') + return NULL; + char *v = line + klen + 1; + while (*v == ' ' || *v == '\t') + v++; + return v; +} + +static char *strdup_range(const char *s, const char *end) +{ + size_t n = (size_t) (end - s); + char *r = malloc(n + 1); + if (!r) + return NULL; + memcpy(r, s, n); + r[n] = '\0'; + return r; +} + +/* Parse a Bearer challenge value into realm/service/scope. Accepts unquoted + * values too (some test fixtures and a few private registries skip the + * quotes). Returns 0 on success or -1 on malformed input. On success *out is + * fully owned by the caller; any prior contents are freed. + */ +static int parse_bearer_challenge(const char *value, bearer_challenge_t *out) +{ + bearer_challenge_t tmp = {0}; + const char *p = value; + while (*p == ' ' || *p == '\t') + p++; + if (strncasecmp(p, "Bearer", 6) != 0) + return -1; + p += 6; + while (*p == ' ' || *p == '\t') + p++; + while (*p) { + const char *key_start = p; + while (*p && *p != '=' && *p != ',') + p++; + if (*p != '=') { + bearer_challenge_free(&tmp); + return -1; + } + const char *key_end = p; + p++; + char *value_str; + if (*p == '"') { + p++; + const char *vstart = p; + while (*p && *p != '"') + p++; + if (*p != '"') { + bearer_challenge_free(&tmp); + return -1; + } + value_str = strdup_range(vstart, p); + p++; + } else { + const char *vstart = p; + while (*p && *p != ',') + p++; + value_str = strdup_range(vstart, p); + } + if (!value_str) { + bearer_challenge_free(&tmp); + return -1; + } + size_t klen = (size_t) (key_end - key_start); + char **target = NULL; + if (klen == 5 && !strncasecmp(key_start, "realm", 5)) + target = &tmp.realm; + else if (klen == 7 && !strncasecmp(key_start, "service", 7)) + target = &tmp.service; + else if (klen == 5 && !strncasecmp(key_start, "scope", 5)) + target = &tmp.scope; + if (target) { + free(*target); + *target = value_str; + } else { + free(value_str); + } + while (*p == ',' || *p == ' ' || *p == '\t') + p++; + } + if (!tmp.realm) { + bearer_challenge_free(&tmp); + return -1; + } + bearer_challenge_free(out); + *out = tmp; + return 0; +} + +typedef struct { + char *content_type; + char *docker_content_digest; + bearer_challenge_t *challenge_out; +} headers_ctx_t; + +static size_t header_cb(char *buffer, size_t size, size_t nitems, void *userdata) +{ + headers_ctx_t *ctx = userdata; + size_t n = size * nitems; + size_t total = n; + if (n == 0 || n >= 4096) + return total; + char line[4096]; + memcpy(line, buffer, n); + line[n] = '\0'; + while (n > 0 && (line[n - 1] == '\r' || line[n - 1] == '\n')) + line[--n] = '\0'; + if (n == 0) + return total; + + char *v = match_header(line, "Content-Type"); + if (v) { + v = trim_inplace(v); + char *semi = strchr(v, ';'); + if (semi) + *semi = '\0'; + v = trim_inplace(v); + free(ctx->content_type); + ctx->content_type = strdup(v); + return total; + } + v = match_header(line, "Docker-Content-Digest"); + if (v) { + v = trim_inplace(v); + free(ctx->docker_content_digest); + ctx->docker_content_digest = strdup(v); + return total; + } + if (ctx->challenge_out) { + v = match_header(line, "Www-Authenticate"); + if (v) { + v = trim_inplace(v); + (void) parse_bearer_challenge(v, ctx->challenge_out); + } + } + return total; +} + +static struct curl_slist *build_request_headers(const oci_fetcher_t *f, + const char *const *accept_types) +{ + struct curl_slist *hdrs = NULL; + if (accept_types) { + for (const char *const *p = accept_types; *p; p++) { + char hdr[256]; + snprintf(hdr, sizeof(hdr), "Accept: %s", *p); + hdrs = curl_slist_append(hdrs, hdr); + } + } + if (f->bearer_token) { + size_t n = strlen(f->bearer_token) + sizeof("Authorization: Bearer "); + char *hdr = malloc(n); + if (hdr) { + snprintf(hdr, n, "Authorization: Bearer %s", f->bearer_token); + hdrs = curl_slist_append(hdrs, hdr); + free(hdr); + } + } + return hdrs; +} + +static int fetch_token(oci_fetcher_t *f, const char **err_msg) +{ + if (!f->challenge.realm) { + if (err_msg) + *err_msg = "no bearer realm to fetch token from"; + errno = EINVAL; + return -1; + } + + char *enc_service = f->challenge.service + ? curl_easy_escape(f->easy, f->challenge.service, 0) + : NULL; + char *enc_scope = f->challenge.scope + ? curl_easy_escape(f->easy, f->challenge.scope, 0) + : NULL; + size_t n = strlen(f->challenge.realm) + + (enc_service ? strlen(enc_service) + 16 : 0) + + (enc_scope ? strlen(enc_scope) + 16 : 0) + 2; + char *url = malloc(n); + if (!url) { + curl_free(enc_service); + curl_free(enc_scope); + if (err_msg) + *err_msg = "out of memory"; + errno = ENOMEM; + return -1; + } + int len = snprintf(url, n, "%s", f->challenge.realm); + char sep = strchr(f->challenge.realm, '?') ? '&' : '?'; + if (enc_service) { + len += snprintf(url + len, n - (size_t) len, "%cservice=%s", sep, + enc_service); + sep = '&'; + } + if (enc_scope) { + snprintf(url + len, n - (size_t) len, "%cscope=%s", sep, enc_scope); + } + curl_free(enc_service); + curl_free(enc_scope); + + body_buf_t body = {.max = FETCH_BODY_MAX}; + headers_ctx_t hctx = {0}; + curl_easy_reset(f->easy); + curl_easy_setopt(f->easy, CURLOPT_URL, url); + curl_easy_setopt(f->easy, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(f->easy, CURLOPT_MAXREDIRS, 5L); + curl_easy_setopt(f->easy, CURLOPT_USERAGENT, "elfuse-oci/1"); + curl_easy_setopt(f->easy, CURLOPT_WRITEFUNCTION, body_write_cb); + curl_easy_setopt(f->easy, CURLOPT_WRITEDATA, &body); + curl_easy_setopt(f->easy, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt(f->easy, CURLOPT_HEADERDATA, &hctx); + + CURLcode rc = curl_easy_perform(f->easy); + long status = 0; + curl_easy_getinfo(f->easy, CURLINFO_RESPONSE_CODE, &status); + free(url); + free(hctx.content_type); + free(hctx.docker_content_digest); + + if (rc != CURLE_OK) { + free(body.buf); + if (err_msg) + *err_msg = curl_easy_strerror(rc); + errno = EIO; + return -1; + } + if (status < 200 || status >= 300) { + free(body.buf); + if (err_msg) + *err_msg = "token endpoint returned non-2xx status"; + errno = EPROTO; + return -1; + } + if (!body.buf || body.len == 0) { + free(body.buf); + if (err_msg) + *err_msg = "token endpoint returned empty body"; + errno = EPROTO; + return -1; + } + + cJSON *json = cJSON_ParseWithLength(body.buf, body.len); + free(body.buf); + if (!json) { + if (err_msg) + *err_msg = "token endpoint returned invalid JSON"; + errno = EPROTO; + return -1; + } + cJSON *t = cJSON_GetObjectItemCaseSensitive(json, "token"); + if (!cJSON_IsString(t) || !t->valuestring) + t = cJSON_GetObjectItemCaseSensitive(json, "access_token"); + if (!cJSON_IsString(t) || !t->valuestring) { + cJSON_Delete(json); + if (err_msg) + *err_msg = "token endpoint response lacks 'token' field"; + errno = EPROTO; + return -1; + } + free(f->bearer_token); + f->bearer_token = strdup(t->valuestring); + cJSON_Delete(json); + if (!f->bearer_token) { + if (err_msg) + *err_msg = "out of memory caching token"; + errno = ENOMEM; + return -1; + } + return 0; +} + +static int perform_manifest_get(oci_fetcher_t *f, + const char *url, + const char *const *accept_types, + oci_fetch_response_t *out, + bearer_challenge_t *challenge_out, + const char **err_msg) +{ + body_buf_t body = {.max = FETCH_BODY_MAX}; + headers_ctx_t hctx = {.challenge_out = challenge_out}; + if (challenge_out) + bearer_challenge_free(challenge_out); + + curl_easy_reset(f->easy); + curl_easy_setopt(f->easy, CURLOPT_URL, url); + curl_easy_setopt(f->easy, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(f->easy, CURLOPT_MAXREDIRS, 5L); + curl_easy_setopt(f->easy, CURLOPT_USERAGENT, "elfuse-oci/1"); + curl_easy_setopt(f->easy, CURLOPT_WRITEFUNCTION, body_write_cb); + curl_easy_setopt(f->easy, CURLOPT_WRITEDATA, &body); + curl_easy_setopt(f->easy, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt(f->easy, CURLOPT_HEADERDATA, &hctx); + struct curl_slist *hdrs = build_request_headers(f, accept_types); + if (hdrs) + curl_easy_setopt(f->easy, CURLOPT_HTTPHEADER, hdrs); + + CURLcode rc = curl_easy_perform(f->easy); + long status = 0; + curl_easy_getinfo(f->easy, CURLINFO_RESPONSE_CODE, &status); + if (hdrs) + curl_slist_free_all(hdrs); + + out->http_status = status; + if (rc != CURLE_OK) { + free(body.buf); + free(hctx.content_type); + free(hctx.docker_content_digest); + if (err_msg) + *err_msg = curl_easy_strerror(rc); + errno = EIO; + return -1; + } + if (body.overflow) { + free(body.buf); + free(hctx.content_type); + free(hctx.docker_content_digest); + if (err_msg) + *err_msg = "response body exceeded max size"; + errno = EFBIG; + return -1; + } + out->body = body.buf; + out->body_len = body.len; + out->content_type = hctx.content_type; + out->docker_content_digest = hctx.docker_content_digest; + return 0; +} + +int oci_fetch_manifest(oci_fetcher_t *f, + const oci_ref_t *ref, + const char *digest_or_tag, + const char *const *accept_types, + oci_fetch_response_t *out, + const char **err_msg) +{ + if (!f || !ref || !out) { + if (err_msg) + *err_msg = "invalid arguments"; + errno = EINVAL; + return -1; + } + memset(out, 0, sizeof(*out)); + const char *selector = digest_or_tag; + if (!selector) + selector = ref->digest; + if (!selector) + selector = ref->tag; + if (!selector) { + if (err_msg) + *err_msg = "reference has no tag or digest"; + errno = EINVAL; + return -1; + } + char *url = build_manifest_url(f, ref, selector); + if (!url) { + if (err_msg) + *err_msg = "out of memory"; + errno = ENOMEM; + return -1; + } + + bearer_challenge_t challenge = {0}; + int rc = perform_manifest_get(f, url, accept_types, out, + f->bearer_token ? NULL : &challenge, + err_msg); + if (rc < 0) { + free(url); + bearer_challenge_free(&challenge); + return -1; + } + + if (out->http_status == 401 && challenge.realm) { + bearer_challenge_free(&f->challenge); + f->challenge = challenge; + memset(&challenge, 0, sizeof(challenge)); + oci_fetch_response_free(out); + memset(out, 0, sizeof(*out)); + if (fetch_token(f, err_msg) < 0) { + free(url); + return -1; + } + rc = perform_manifest_get(f, url, accept_types, out, NULL, err_msg); + if (rc < 0) { + free(url); + return -1; + } + } else { + bearer_challenge_free(&challenge); + } + + free(url); + + if (out->http_status < 200 || out->http_status >= 300) { + if (err_msg) + *err_msg = "manifest fetch returned non-2xx status"; + errno = EPROTO; + return -1; + } + return 0; +} + +typedef struct { + oci_blob_writer_t *w; + int64_t bytes_seen; + int64_t bytes_expected; + bool overflow; + bool write_failed; +} blob_stream_ctx_t; + +static size_t blob_stream_cb(char *ptr, size_t size, size_t nmemb, void *userdata) +{ + blob_stream_ctx_t *ctx = userdata; + size_t n = size * nmemb; + if (ctx->overflow || ctx->write_failed) + return 0; + int64_t projected = ctx->bytes_seen + (int64_t) n; + if (projected > ctx->bytes_expected) { + ctx->overflow = true; + return 0; + } + if (!oci_blob_writer_write(ctx->w, ptr, n)) { + ctx->write_failed = true; + return 0; + } + ctx->bytes_seen = projected; + return n; +} + +static int perform_blob_get(oci_fetcher_t *f, + const char *url, + blob_stream_ctx_t *bctx, + long *out_status, + bearer_challenge_t *challenge_out, + const char **err_msg) +{ + headers_ctx_t hctx = {.challenge_out = challenge_out}; + if (challenge_out) + bearer_challenge_free(challenge_out); + + curl_easy_reset(f->easy); + curl_easy_setopt(f->easy, CURLOPT_URL, url); + curl_easy_setopt(f->easy, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(f->easy, CURLOPT_MAXREDIRS, 5L); + curl_easy_setopt(f->easy, CURLOPT_USERAGENT, "elfuse-oci/1"); + curl_easy_setopt(f->easy, CURLOPT_WRITEFUNCTION, blob_stream_cb); + curl_easy_setopt(f->easy, CURLOPT_WRITEDATA, bctx); + curl_easy_setopt(f->easy, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt(f->easy, CURLOPT_HEADERDATA, &hctx); + struct curl_slist *hdrs = build_request_headers(f, NULL); + if (hdrs) + curl_easy_setopt(f->easy, CURLOPT_HTTPHEADER, hdrs); + + CURLcode rc = curl_easy_perform(f->easy); + long status = 0; + curl_easy_getinfo(f->easy, CURLINFO_RESPONSE_CODE, &status); + if (hdrs) + curl_slist_free_all(hdrs); + free(hctx.content_type); + free(hctx.docker_content_digest); + + *out_status = status; + if (rc != CURLE_OK) { + if (bctx->overflow) { + if (err_msg) + *err_msg = "blob exceeded declared size"; + errno = EPROTO; + return -1; + } + if (bctx->write_failed) { + if (err_msg) + *err_msg = "blob writer rejected payload"; + errno = EIO; + return -1; + } + if (err_msg) + *err_msg = curl_easy_strerror(rc); + errno = EIO; + return -1; + } + return 0; +} + +int oci_fetch_blob(oci_fetcher_t *f, + const oci_ref_t *ref, + const oci_descriptor_t *desc, + oci_blob_store_t *store, + const char **err_msg) +{ + if (!f || !ref || !desc || !store) { + if (err_msg) + *err_msg = "invalid arguments"; + errno = EINVAL; + return -1; + } + if (desc->size < 0) { + if (err_msg) + *err_msg = "descriptor size is negative"; + errno = EINVAL; + return -1; + } + if (oci_blob_store_has(store, desc->algo, desc->hex)) + return 0; + + char *url = build_blob_url(f, ref, desc->digest_str); + if (!url) { + if (err_msg) + *err_msg = "out of memory"; + errno = ENOMEM; + return -1; + } + + oci_blob_writer_t *w = oci_blob_writer_begin(store, desc->algo, desc->hex); + if (!w) { + free(url); + if (err_msg) + *err_msg = "failed to start blob writer"; + return -1; + } + blob_stream_ctx_t bctx = {.w = w, .bytes_expected = desc->size}; + + bearer_challenge_t challenge = {0}; + long status = 0; + int rc = perform_blob_get(f, url, &bctx, &status, + f->bearer_token ? NULL : &challenge, err_msg); + if (rc < 0) { + free(url); + oci_blob_writer_abort(w); + bearer_challenge_free(&challenge); + return -1; + } + + if (status == 401 && challenge.realm) { + oci_blob_writer_abort(w); + bearer_challenge_free(&f->challenge); + f->challenge = challenge; + memset(&challenge, 0, sizeof(challenge)); + if (fetch_token(f, err_msg) < 0) { + free(url); + return -1; + } + w = oci_blob_writer_begin(store, desc->algo, desc->hex); + if (!w) { + free(url); + if (err_msg) + *err_msg = "failed to restart blob writer"; + return -1; + } + bctx = (blob_stream_ctx_t){.w = w, .bytes_expected = desc->size}; + rc = perform_blob_get(f, url, &bctx, &status, NULL, err_msg); + if (rc < 0) { + free(url); + oci_blob_writer_abort(w); + return -1; + } + } else { + bearer_challenge_free(&challenge); + } + + free(url); + + if (status < 200 || status >= 300) { + oci_blob_writer_abort(w); + if (err_msg) + *err_msg = "blob fetch returned non-2xx status"; + errno = EPROTO; + return -1; + } + if (bctx.bytes_seen != desc->size) { + oci_blob_writer_abort(w); + if (err_msg) + *err_msg = "blob size mismatch"; + errno = EPROTO; + return -1; + } + if (oci_blob_writer_commit(w) < 0) { + if (err_msg) + *err_msg = "blob digest mismatch on commit"; + return -1; + } + return 0; +} diff --git a/src/oci/fetch.h b/src/oci/fetch.h new file mode 100644 index 0000000..a802abe --- /dev/null +++ b/src/oci/fetch.h @@ -0,0 +1,128 @@ +/* OCI registry HTTPS client + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Wraps libcurl for the subset of the OCI distribution-spec that elfuse needs + * to pull an image: + * + * - Anonymous GET against /v2//manifests/ and /v2//blobs/ + * - 401 + Www-Authenticate: Bearer challenge: fetch a token from the realm + * advertised by the registry, then retry the original request with + * Authorization: Bearer + * - Blob streaming: pipe the response body into the slice-2 blob store with + * digest and declared-size verification, so a hostile or truncated layer + * never produces a visible-complete blob + * + * Future slices extend the options struct with basic auth credentials, + * custom CA bundle, and a loopback-gated TLS verify-off path + * (oci-roadmap.md Q7 ship list). The public entry points stay stable. + * + * Thread safety: oci_fetch_global_init must run once before any fetcher is + * created. Each oci_fetcher_t holds its own libcurl easy handle and is not + * safe to share across threads; create one per worker. + */ + +#pragma once + +#include +#include + +#include "blob-store.h" +#include "manifest.h" +#include "ref.h" + +typedef struct { + /* Optional override of the registry base URL. When non-NULL, the fetcher + * uses this prefix for every /v2/... request instead of computing one + * from ref->registry. Test scaffolding sets this to a local mock + * (http://127.0.0.1:); production callers leave it NULL. + * + * Reserved for slice 4b: username, password, ca_file, allow_insecure. + * Treat any unset future field as NULL/false. + */ + const char *base_url_override; +} oci_fetcher_options_t; + +typedef struct oci_fetcher oci_fetcher_t; + +/* Per-process libcurl global init. Safe to call multiple times; only the + * first call performs work. Returns 0 on success or -1 with errno=EIO if + * libcurl rejects the initialization. + */ +int oci_fetch_global_init(void); + +/* Counterpart of oci_fetch_global_init. The caller may invoke it on shutdown + * but elfuse runs short enough that leaving libcurl initialized until process + * exit is acceptable. + */ +void oci_fetch_global_cleanup(void); + +/* Allocate a fetcher. opts may be NULL for defaults. Returns NULL on + * allocation failure with errno preserved. + */ +oci_fetcher_t *oci_fetcher_new(const oci_fetcher_options_t *opts); + +/* Release the fetcher. Safe on NULL. */ +void oci_fetcher_free(oci_fetcher_t *f); + +typedef struct { + /* Heap-allocated response body. NUL-terminated so callers can pass it + * directly to JSON parsers that expect a C string, while body_len is the + * authoritative byte count. + */ + char *body; + size_t body_len; + /* Content-Type header value with parameters stripped (everything before + * the first ';'). NULL if the server omitted the header. + */ + char *content_type; + /* Docker-Content-Digest header value verbatim, e.g. "sha256:abc...". + * NULL if the server omitted it. Useful for tag-to-digest pinning. + */ + char *docker_content_digest; + long http_status; +} oci_fetch_response_t; + +/* Release any heap fields. Safe on a zero-initialised struct. */ +void oci_fetch_response_free(oci_fetch_response_t *r); + +/* Fetch a manifest, image index, or image config blob by reference. + * + * ref registry/repository, plus optional default tag/digest + * digest_or_tag the actual GET selector ("sha256:..." or a tag string). + * NULL means: use ref->digest if set, otherwise ref->tag. + * accept_types NULL-terminated list of media types to advertise in the + * Accept header. Pass NULL to suppress the Accept header. + * + * On success returns 0 and fills *out (caller frees via + * oci_fetch_response_free). On HTTP error (non-2xx) returns -1 with + * out->http_status populated and errno=EPROTO; the body may still be present + * for diagnostics. On transport / auth failure returns -1 with errno + * preserved and *err_msg (when non-NULL) pointing at a static description. + */ +int oci_fetch_manifest(oci_fetcher_t *f, + const oci_ref_t *ref, + const char *digest_or_tag, + const char *const *accept_types, + oci_fetch_response_t *out, + const char **err_msg); + +/* Fetch a blob into the local store. The descriptor's algo, hex, and size + * fields drive verification: incoming bytes feed an oci_blob_writer keyed by + * the digest, the running byte count is capped at desc->size so a hostile + * server cannot stream forever, and the writer's own digest check at commit + * rejects any payload that hashes to anything other than desc->hex. + * + * Returns 0 on success, -1 with errno set on failure. err_msg points at a + * static description for the common diagnostic modes (digest mismatch, + * size mismatch, transport error, HTTP status). + * + * Already-present blobs are an immediate success (store-side has() check) + * with no network call. + */ +int oci_fetch_blob(oci_fetcher_t *f, + const oci_ref_t *ref, + const oci_descriptor_t *desc, + oci_blob_store_t *store, + const char **err_msg); diff --git a/tests/test-oci-fetch.c b/tests/test-oci-fetch.c new file mode 100644 index 0000000..eca4968 --- /dev/null +++ b/tests/test-oci-fetch.c @@ -0,0 +1,948 @@ +/* OCI registry HTTPS client unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Spawns a single-threaded HTTP/1.1 mock server on 127.0.0.1: and + * drives oci_fetch_manifest / oci_fetch_blob against it. The mock server is + * scripted per request via a handler function pointer: each test installs the + * behavior it wants (200 OK, 401 with bearer challenge, 404, oversize blob, + * etc.) and verifies the response captured by the fetcher plus side effects + * in a temporary blob store directory. + * + * No real network is touched. The optional OCI_FETCH_ONLINE=1 environment + * variable enables a single additional case that pulls alpine:3.20 from + * Docker Hub anonymously; that path is gated behind make test-oci-fetch-online + * and is not part of make check. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oci/blob-store.h" +#include "oci/digest.h" +#include "oci/fetch.h" +#include "oci/manifest.h" +#include "oci/ref.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int g_total = 0; +static int g_passed = 0; + +static void report_pass(const char *name) +{ + g_total++; + g_passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +static void report_fail(const char *name, const char *fmt, ...) +{ + g_total++; + printf(" " RED "FAIL" RESET " %s", name); + if (fmt && *fmt) { + printf(": "); + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + } + printf("\n"); +} + +/* ── Mock HTTP server ────────────────────────────────────────────── */ + +typedef struct { + char method[8]; + char path[1024]; + char authorization[1024]; + char accept[1024]; +} mock_request_t; + +#define MOCK_LOG_MAX 16 + +typedef struct mock_server mock_server_t; +typedef void (*mock_handler_t)(mock_server_t *s, int fd, + const mock_request_t *req); + +struct mock_server { + int listen_fd; + int port; + pthread_t thread; + pthread_mutex_t lock; + bool stop; + int n_requests; + mock_request_t log[MOCK_LOG_MAX]; + mock_handler_t handler; + void *ctx; +}; + +static ssize_t read_all_until_empty(int fd, char *buf, size_t cap) +{ + size_t off = 0; + while (off + 1 < cap) { + ssize_t n = read(fd, buf + off, cap - 1 - off); + if (n <= 0) + break; + off += (size_t) n; + buf[off] = '\0'; + if (strstr(buf, "\r\n\r\n")) + break; + } + return (ssize_t) off; +} + +static void parse_request(const char *raw, mock_request_t *out) +{ + memset(out, 0, sizeof(*out)); + /* Request line: METHOD SP path SP HTTP/x */ + const char *sp1 = strchr(raw, ' '); + if (!sp1) + return; + size_t mlen = (size_t) (sp1 - raw); + if (mlen >= sizeof(out->method)) + mlen = sizeof(out->method) - 1; + memcpy(out->method, raw, mlen); + const char *sp2 = strchr(sp1 + 1, ' '); + if (!sp2) + return; + size_t plen = (size_t) (sp2 - sp1 - 1); + if (plen >= sizeof(out->path)) + plen = sizeof(out->path) - 1; + memcpy(out->path, sp1 + 1, plen); + + /* Header scan. */ + const char *line = strstr(raw, "\r\n"); + if (!line) + return; + line += 2; + while (*line && strncmp(line, "\r\n", 2) != 0) { + const char *eol = strstr(line, "\r\n"); + if (!eol) + break; + size_t llen = (size_t) (eol - line); + if (llen > 13 && !strncasecmp(line, "Authorization:", 14)) { + const char *v = line + 14; + while (*v == ' ') + v++; + size_t vlen = (size_t) (eol - v); + if (vlen >= sizeof(out->authorization)) + vlen = sizeof(out->authorization) - 1; + memcpy(out->authorization, v, vlen); + out->authorization[vlen] = '\0'; + } else if (llen > 6 && !strncasecmp(line, "Accept:", 7)) { + const char *v = line + 7; + while (*v == ' ') + v++; + size_t vlen = (size_t) (eol - v); + if (vlen >= sizeof(out->accept)) + vlen = sizeof(out->accept) - 1; + memcpy(out->accept, v, vlen); + out->accept[vlen] = '\0'; + } + line = eol + 2; + } +} + +static void *mock_server_loop(void *arg) +{ + mock_server_t *s = arg; + while (1) { + pthread_mutex_lock(&s->lock); + bool stop = s->stop; + pthread_mutex_unlock(&s->lock); + if (stop) + break; + int cfd = accept(s->listen_fd, NULL, NULL); + if (cfd < 0) { + if (errno == EINTR) + continue; + break; + } + char buf[8192]; + ssize_t got = read_all_until_empty(cfd, buf, sizeof(buf)); + if (got <= 0) { + close(cfd); + continue; + } + mock_request_t req; + parse_request(buf, &req); + + pthread_mutex_lock(&s->lock); + if (s->n_requests < MOCK_LOG_MAX) { + s->log[s->n_requests++] = req; + } + mock_handler_t h = s->handler; + pthread_mutex_unlock(&s->lock); + + if (h) + h(s, cfd, &req); + close(cfd); + } + return NULL; +} + +static int mock_server_start(mock_server_t *s) +{ + memset(s, 0, sizeof(*s)); + pthread_mutex_init(&s->lock, NULL); + s->listen_fd = socket(AF_INET, SOCK_STREAM, 0); + if (s->listen_fd < 0) + return -1; + int yes = 1; + setsockopt(s->listen_fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = 0, + }; + if (bind(s->listen_fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) { + close(s->listen_fd); + return -1; + } + socklen_t slen = sizeof(sa); + if (getsockname(s->listen_fd, (struct sockaddr *) &sa, &slen) < 0) { + close(s->listen_fd); + return -1; + } + s->port = ntohs(sa.sin_port); + if (listen(s->listen_fd, 8) < 0) { + close(s->listen_fd); + return -1; + } + if (pthread_create(&s->thread, NULL, mock_server_loop, s) != 0) { + close(s->listen_fd); + return -1; + } + return 0; +} + +static void mock_server_stop(mock_server_t *s) +{ + pthread_mutex_lock(&s->lock); + s->stop = true; + pthread_mutex_unlock(&s->lock); + /* Unblock the accept by connecting to ourselves. */ + int wake = socket(AF_INET, SOCK_STREAM, 0); + if (wake >= 0) { + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(s->port), + }; + (void) connect(wake, (struct sockaddr *) &sa, sizeof(sa)); + close(wake); + } + pthread_join(s->thread, NULL); + close(s->listen_fd); + pthread_mutex_destroy(&s->lock); +} + +static void mock_set_handler(mock_server_t *s, mock_handler_t h, void *ctx) +{ + pthread_mutex_lock(&s->lock); + s->handler = h; + s->ctx = ctx; + s->n_requests = 0; + memset(s->log, 0, sizeof(s->log)); + pthread_mutex_unlock(&s->lock); +} + +static void mock_send_full(int fd, int status, const char *status_text, + const char *content_type, + const char *www_authenticate, + const char *docker_digest, + const void *body, + size_t body_len) +{ + char header[1024]; + int n = snprintf(header, sizeof(header), + "HTTP/1.1 %d %s\r\n" + "Content-Length: %zu\r\n", + status, status_text ? status_text : "OK", body_len); + if (content_type) + n += snprintf(header + n, sizeof(header) - (size_t) n, + "Content-Type: %s\r\n", content_type); + if (www_authenticate) + n += snprintf(header + n, sizeof(header) - (size_t) n, + "Www-Authenticate: %s\r\n", www_authenticate); + if (docker_digest) + n += snprintf(header + n, sizeof(header) - (size_t) n, + "Docker-Content-Digest: %s\r\n", docker_digest); + n += snprintf(header + n, sizeof(header) - (size_t) n, "\r\n"); + (void) !write(fd, header, (size_t) n); + if (body_len > 0) + (void) !write(fd, body, body_len); +} + +/* ── Helpers ─────────────────────────────────────────────────────── */ + +static int remove_entry(const char *path, const struct stat *st, int typeflag, + struct FTW *ftwbuf) +{ + (void) st; + (void) typeflag; + (void) ftwbuf; + return remove(path); +} + +static void wipe_dir(const char *root) +{ + (void) nftw(root, remove_entry, 8, FTW_DEPTH | FTW_PHYS); +} + +static char *make_scratch_root(void) +{ + char *tmpl = strdup("/tmp/elfuse-oci-fetch-XXXXXX"); + if (!tmpl || !mkdtemp(tmpl)) { + free(tmpl); + return NULL; + } + return tmpl; +} + +static char *make_base_url(int port) +{ + char *url = malloc(64); + if (!url) + return NULL; + snprintf(url, 64, "http://127.0.0.1:%d", port); + return url; +} + +static void fill_descriptor(oci_descriptor_t *desc, + char *digest_str_buf, size_t digest_str_cap, + oci_digest_algo_t algo, const char *hex, + int64_t size, oci_media_type_t mt) +{ + memset(desc, 0, sizeof(*desc)); + desc->algo = algo; + snprintf(digest_str_buf, digest_str_cap, "%s:%s", + oci_digest_algo_name(algo), hex); + desc->digest_str = digest_str_buf; + memcpy(desc->hex, hex, strlen(hex) + 1); + desc->size = size; + desc->media_type = mt; +} + +/* ── Handlers ────────────────────────────────────────────────────── */ + +typedef struct { + const char *manifest_path; + const char *body; + size_t body_len; + const char *content_type; + const char *docker_digest; +} handler_anonymous_manifest_t; + +static void h_anonymous_manifest(mock_server_t *s, int fd, + const mock_request_t *req) +{ + handler_anonymous_manifest_t *ctx = s->ctx; + if (strcmp(req->path, ctx->manifest_path) == 0) { + mock_send_full(fd, 200, "OK", ctx->content_type, NULL, ctx->docker_digest, + ctx->body, ctx->body_len); + return; + } + mock_send_full(fd, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); +} + +typedef struct { + const char *manifest_path; + const char *expected_token; + const char *manifest_body; + size_t manifest_body_len; + const char *content_type; + char base_url[64]; +} handler_bearer_t; + +static void h_bearer_flow(mock_server_t *s, int fd, const mock_request_t *req) +{ + handler_bearer_t *ctx = s->ctx; + if (strncmp(req->path, "/token", 6) == 0) { + char body[256]; + int n = snprintf(body, sizeof(body), + "{\"token\":\"%s\",\"expires_in\":300}", + ctx->expected_token); + mock_send_full(fd, 200, "OK", "application/json", NULL, NULL, body, + (size_t) n); + return; + } + if (strcmp(req->path, ctx->manifest_path) == 0) { + char want_auth[256]; + snprintf(want_auth, sizeof(want_auth), "Bearer %s", ctx->expected_token); + if (strcmp(req->authorization, want_auth) == 0) { + mock_send_full(fd, 200, "OK", ctx->content_type, NULL, NULL, + ctx->manifest_body, ctx->manifest_body_len); + return; + } + char challenge[512]; + snprintf(challenge, sizeof(challenge), + "Bearer realm=\"%s/token\",service=\"reg\"," + "scope=\"repository:private/secret:pull\"", + ctx->base_url); + mock_send_full(fd, 401, "Unauthorized", "application/json", challenge, + NULL, "{}", 2); + return; + } + mock_send_full(fd, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); +} + +typedef struct { + const char *blob_path; + const void *body; + size_t body_len; + int status; /* override; 0 = 200 */ + bool oversize; /* if true, send body_len + 5 bytes */ +} handler_blob_t; + +static void h_blob(mock_server_t *s, int fd, const mock_request_t *req) +{ + handler_blob_t *ctx = s->ctx; + if (strcmp(req->path, ctx->blob_path) != 0) { + mock_send_full(fd, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + return; + } + int status = ctx->status ? ctx->status : 200; + if (status != 200) { + mock_send_full(fd, status, "Error", "text/plain", NULL, NULL, "err", 3); + return; + } + if (ctx->oversize) { + size_t pad_len = ctx->body_len + 5; + char *buf = malloc(pad_len); + memcpy(buf, ctx->body, ctx->body_len); + memset(buf + ctx->body_len, 'X', 5); + mock_send_full(fd, 200, "OK", "application/octet-stream", NULL, NULL, + buf, pad_len); + free(buf); + return; + } + mock_send_full(fd, 200, "OK", "application/octet-stream", NULL, NULL, + ctx->body, ctx->body_len); +} + +/* ── Tests ───────────────────────────────────────────────────────── */ + +static void test_anonymous_manifest(mock_server_t *server, oci_fetcher_t *f) +{ + static const char BODY[] = "{\"schemaVersion\":2}"; + static const char DIGEST[] = + "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; + handler_anonymous_manifest_t ctx = { + .manifest_path = "/v2/library/alpine/manifests/3.20", + .body = BODY, + .body_len = strlen(BODY), + .content_type = "application/vnd.oci.image.manifest.v1+json", + .docker_digest = DIGEST, + }; + mock_set_handler(server, h_anonymous_manifest, &ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc != 0) { + report_fail("anonymous manifest GET", "rc=%d err=%s", rc, + err ? err : "(none)"); + } else if (resp.http_status != 200) { + report_fail("anonymous manifest GET", "status=%ld", resp.http_status); + } else if (resp.body_len != strlen(BODY) || + memcmp(resp.body, BODY, resp.body_len) != 0) { + report_fail("anonymous manifest GET", "body mismatch"); + } else if (!resp.content_type || + strcmp(resp.content_type, + "application/vnd.oci.image.manifest.v1+json") != 0) { + report_fail("anonymous manifest GET", "content_type=%s", + resp.content_type ? resp.content_type : "(null)"); + } else if (!resp.docker_content_digest || + strcmp(resp.docker_content_digest, DIGEST) != 0) { + report_fail("anonymous manifest GET", "docker_digest=%s", + resp.docker_content_digest ? resp.docker_content_digest + : "(null)"); + } else { + report_pass("anonymous manifest GET"); + } + oci_fetch_response_free(&resp); +} + +static void test_manifest_404(mock_server_t *server, oci_fetcher_t *f) +{ + handler_anonymous_manifest_t ctx = { + .manifest_path = "/v2/library/missing/manifests/v9", + .body = "{}", + .body_len = 2, + .content_type = "application/json", + .docker_digest = NULL, + }; + mock_set_handler(server, h_anonymous_manifest, &ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/nope", + .tag = "v0", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc == 0) { + report_fail("manifest 404 surfaces as error", "rc=0"); + } else if (resp.http_status != 404) { + report_fail("manifest 404 surfaces as error", "status=%ld", + resp.http_status); + } else { + report_pass("manifest 404 surfaces as error"); + } + oci_fetch_response_free(&resp); +} + +static void test_bearer_challenge(mock_server_t *server, oci_fetcher_t *f, + handler_bearer_t *ctx) +{ + mock_set_handler(server, h_bearer_flow, ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "private/secret", + .tag = "v1", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc != 0) { + report_fail("bearer challenge fetches token and retries", "rc=%d err=%s", + rc, err ? err : "(none)"); + } else if (resp.http_status != 200 || + resp.body_len != ctx->manifest_body_len || + memcmp(resp.body, ctx->manifest_body, resp.body_len) != 0) { + report_fail("bearer challenge fetches token and retries", + "status=%ld body_len=%zu", resp.http_status, resp.body_len); + } else if (server->n_requests != 3) { + report_fail("bearer challenge fetches token and retries", + "expected 3 requests, got %d", server->n_requests); + } else if (strncmp(server->log[1].path, "/token", 6) != 0) { + report_fail("bearer challenge fetches token and retries", + "second request was %s, not /token", server->log[1].path); + } else if (strcmp(server->log[2].authorization, + "Bearer testtoken123") != 0) { + report_fail("bearer challenge fetches token and retries", + "retry Authorization=%s", server->log[2].authorization); + } else { + report_pass("bearer challenge fetches token and retries"); + } + oci_fetch_response_free(&resp); +} + +static void test_token_reuse(mock_server_t *server, oci_fetcher_t *f) +{ + /* Second fetch on the same fetcher after a successful bearer flow should + * attach the cached token straight away and skip the 401 dance. The mock + * keeps the same handler from the bearer test in the parent, so a single + * 200 response is expected. + */ + int before = server->n_requests; + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "private/secret", + .tag = "v1", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc != 0) { + report_fail("cached token reused on subsequent fetch", "rc=%d err=%s", + rc, err ? err : "(none)"); + } else if (server->n_requests - before != 1) { + report_fail("cached token reused on subsequent fetch", + "expected 1 extra request, got %d", + server->n_requests - before); + } else if (strcmp(server->log[before].authorization, + "Bearer testtoken123") != 0) { + report_fail("cached token reused on subsequent fetch", + "Authorization=%s", server->log[before].authorization); + } else { + report_pass("cached token reused on subsequent fetch"); + } + oci_fetch_response_free(&resp); +} + +static const char HELLO_WORLD[] = "hello world"; +static const char HELLO_WORLD_SHA256[] = + "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"; + +static void test_blob_success(mock_server_t *server, oci_fetcher_t *f, + const char *store_root) +{ + oci_blob_store_t *store = oci_blob_store_open(store_root); + if (!store) { + report_fail("blob fetch success commits to store", + "store open: %s", strerror(errno)); + return; + } + + handler_blob_t ctx = { + .blob_path = "/v2/library/alpine/blobs/sha256:b94d27b9934d3e08a52e52d7" + "da7dabfac484efe37a5380ee9088f7ace2efcde9", + .body = HELLO_WORLD, + .body_len = strlen(HELLO_WORLD), + }; + mock_set_handler(server, h_blob, &ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + char digest_str[128]; + oci_descriptor_t desc; + fill_descriptor(&desc, digest_str, sizeof(digest_str), OCI_DIGEST_SHA256, + HELLO_WORLD_SHA256, (int64_t) strlen(HELLO_WORLD), + OCI_MT_LAYER_OCI_TAR_GZIP); + + const char *err = NULL; + int rc = oci_fetch_blob(f, &ref, &desc, store, &err); + if (rc != 0) { + report_fail("blob fetch success commits to store", "rc=%d err=%s", rc, + err ? err : "(none)"); + } else if (!oci_blob_store_has(store, OCI_DIGEST_SHA256, + HELLO_WORLD_SHA256)) { + report_fail("blob fetch success commits to store", + "blob not present after commit"); + } else { + report_pass("blob fetch success commits to store"); + } + oci_blob_store_close(store); +} + +static void test_blob_already_cached(mock_server_t *server, oci_fetcher_t *f, + const char *store_root) +{ + oci_blob_store_t *store = oci_blob_store_open(store_root); + if (!store) { + report_fail("blob fetch skips network when already cached", "store"); + return; + } + /* Pre-populate via put_bytes so the fetch hits the store has() short + * circuit. + */ + if (oci_blob_store_put_bytes(store, OCI_DIGEST_SHA256, HELLO_WORLD_SHA256, + HELLO_WORLD, strlen(HELLO_WORLD)) != 0) { + report_fail("blob fetch skips network when already cached", + "put_bytes: %s", strerror(errno)); + oci_blob_store_close(store); + return; + } + + /* Install a handler that would 404 every request, so any contact is a bug. */ + handler_blob_t ctx = { + .blob_path = "/never-called", + .body = "x", + .body_len = 1, + }; + mock_set_handler(server, h_blob, &ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + char digest_str[128]; + oci_descriptor_t desc; + fill_descriptor(&desc, digest_str, sizeof(digest_str), OCI_DIGEST_SHA256, + HELLO_WORLD_SHA256, (int64_t) strlen(HELLO_WORLD), + OCI_MT_LAYER_OCI_TAR_GZIP); + + const char *err = NULL; + int rc = oci_fetch_blob(f, &ref, &desc, store, &err); + if (rc != 0) { + report_fail("blob fetch skips network when already cached", "rc=%d", rc); + } else if (server->n_requests != 0) { + report_fail("blob fetch skips network when already cached", + "%d unexpected request(s)", server->n_requests); + } else { + report_pass("blob fetch skips network when already cached"); + } + oci_blob_store_close(store); +} + +static void test_blob_size_mismatch(mock_server_t *server, oci_fetcher_t *f, + const char *store_root) +{ + oci_blob_store_t *store = oci_blob_store_open(store_root); + if (!store) { + report_fail("blob size overflow rejected", "store"); + return; + } + handler_blob_t ctx = { + .blob_path = "/v2/library/alpine/blobs/sha256:b94d27b9934d3e08a52e52d7" + "da7dabfac484efe37a5380ee9088f7ace2efcde9", + .body = HELLO_WORLD, + .body_len = strlen(HELLO_WORLD), + .oversize = true, + }; + mock_set_handler(server, h_blob, &ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + char digest_str[128]; + oci_descriptor_t desc; + fill_descriptor(&desc, digest_str, sizeof(digest_str), OCI_DIGEST_SHA256, + HELLO_WORLD_SHA256, (int64_t) strlen(HELLO_WORLD), + OCI_MT_LAYER_OCI_TAR_GZIP); + + const char *err = NULL; + int rc = oci_fetch_blob(f, &ref, &desc, store, &err); + if (rc == 0) { + report_fail("blob size overflow rejected", "rc=0"); + } else if (oci_blob_store_has(store, OCI_DIGEST_SHA256, + HELLO_WORLD_SHA256)) { + report_fail("blob size overflow rejected", "blob visible after failure"); + } else { + report_pass("blob size overflow rejected"); + } + oci_blob_store_close(store); +} + +static void test_blob_digest_mismatch(mock_server_t *server, oci_fetcher_t *f, + const char *store_root) +{ + /* Server returns "hello world" but the descriptor declares a different + * digest hex. Bytes-in matches declared size exactly, so the only + * mismatch is at commit time. + */ + static const char WRONG_HEX[] = + "0000000000000000000000000000000000000000000000000000000000000000"; + oci_blob_store_t *store = oci_blob_store_open(store_root); + if (!store) { + report_fail("blob digest mismatch rejected", "store"); + return; + } + char wrong_path[256]; + snprintf(wrong_path, sizeof(wrong_path), + "/v2/library/alpine/blobs/sha256:%s", WRONG_HEX); + handler_blob_t ctx = { + .blob_path = wrong_path, + .body = HELLO_WORLD, + .body_len = strlen(HELLO_WORLD), + }; + mock_set_handler(server, h_blob, &ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + char digest_str[128]; + oci_descriptor_t desc; + fill_descriptor(&desc, digest_str, sizeof(digest_str), OCI_DIGEST_SHA256, + WRONG_HEX, (int64_t) strlen(HELLO_WORLD), + OCI_MT_LAYER_OCI_TAR_GZIP); + + const char *err = NULL; + int rc = oci_fetch_blob(f, &ref, &desc, store, &err); + if (rc == 0) { + report_fail("blob digest mismatch rejected", "rc=0"); + } else if (oci_blob_store_has(store, OCI_DIGEST_SHA256, WRONG_HEX)) { + report_fail("blob digest mismatch rejected", "blob visible"); + } else { + report_pass("blob digest mismatch rejected"); + } + oci_blob_store_close(store); +} + +static void test_blob_404(mock_server_t *server, oci_fetcher_t *f, + const char *store_root) +{ + oci_blob_store_t *store = oci_blob_store_open(store_root); + handler_blob_t ctx = { + .blob_path = "/never-matches", + .body = "x", + .body_len = 1, + }; + mock_set_handler(server, h_blob, &ctx); + + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + char digest_str[128]; + oci_descriptor_t desc; + fill_descriptor(&desc, digest_str, sizeof(digest_str), OCI_DIGEST_SHA256, + HELLO_WORLD_SHA256, (int64_t) strlen(HELLO_WORLD), + OCI_MT_LAYER_OCI_TAR_GZIP); + + const char *err = NULL; + int rc = oci_fetch_blob(f, &ref, &desc, store, &err); + if (rc == 0) + report_fail("blob 404 rejected", "rc=0"); + else if (oci_blob_store_has(store, OCI_DIGEST_SHA256, HELLO_WORLD_SHA256)) + report_fail("blob 404 rejected", "blob visible after 404"); + else + report_pass("blob 404 rejected"); + oci_blob_store_close(store); +} + +/* ── Online smoke (opt-in) ───────────────────────────────────────── */ + +static void test_online_dockerhub(void) +{ + static const char *accept[] = { + "application/vnd.oci.image.index.v1+json", + "application/vnd.docker.distribution.manifest.list.v2+json", + "application/vnd.oci.image.manifest.v1+json", + "application/vnd.docker.distribution.manifest.v2+json", + NULL, + }; + oci_fetcher_t *f = oci_fetcher_new(NULL); + if (!f) { + report_fail("online docker.io alpine:3.20", "fetcher new: %s", + strerror(errno)); + return; + } + oci_ref_t ref = { + .registry = "docker.io", + .repository = "library/alpine", + .tag = "3.20", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, accept, &resp, &err); + if (rc != 0) { + report_fail("online docker.io alpine:3.20", "rc=%d err=%s status=%ld", + rc, err ? err : "(none)", resp.http_status); + } else if (resp.http_status != 200 || resp.body_len == 0) { + report_fail("online docker.io alpine:3.20", "status=%ld body_len=%zu", + resp.http_status, resp.body_len); + } else { + report_pass("online docker.io alpine:3.20"); + } + oci_fetch_response_free(&resp); + oci_fetcher_free(f); +} + +/* ── main ────────────────────────────────────────────────────────── */ + +int main(void) +{ + char *scratch = make_scratch_root(); + if (!scratch) { + fprintf(stderr, "mkdtemp failed: %s\n", strerror(errno)); + return 1; + } + mock_server_t server; + if (mock_server_start(&server) != 0) { + fprintf(stderr, "mock server start failed: %s\n", strerror(errno)); + wipe_dir(scratch); + free(scratch); + return 1; + } + char *base_url = make_base_url(server.port); + if (!base_url) { + fprintf(stderr, "oom on base url\n"); + mock_server_stop(&server); + wipe_dir(scratch); + free(scratch); + return 1; + } + + printf("oci_fetch (mock HTTP @ %s)\n", base_url); + + { + oci_fetcher_options_t opts = {.base_url_override = base_url}; + oci_fetcher_t *f = oci_fetcher_new(&opts); + if (!f) { + fprintf(stderr, "oci_fetcher_new failed\n"); + free(base_url); + mock_server_stop(&server); + wipe_dir(scratch); + free(scratch); + return 1; + } + test_anonymous_manifest(&server, f); + test_manifest_404(&server, f); + + /* bearer_ctx must outlive both bearer tests because the server thread + * holds a pointer to it via mock_set_handler. + */ + static const char BEARER_BODY[] = + "{\"schemaVersion\":2,\"secret\":true}"; + handler_bearer_t bearer_ctx = { + .manifest_path = "/v2/private/secret/manifests/v1", + .expected_token = "testtoken123", + .manifest_body = BEARER_BODY, + .manifest_body_len = strlen(BEARER_BODY), + .content_type = "application/vnd.oci.image.manifest.v1+json", + }; + snprintf(bearer_ctx.base_url, sizeof(bearer_ctx.base_url), "%s", + base_url); + test_bearer_challenge(&server, f, &bearer_ctx); + test_token_reuse(&server, f); + oci_fetcher_free(f); + } + + /* Each blob test gets its own store directory so dedup short-circuit and + * abort-leaves-no-leftover assertions are independent. + */ + { + oci_fetcher_options_t opts = {.base_url_override = base_url}; + oci_fetcher_t *f = oci_fetcher_new(&opts); + char dir[512]; + + snprintf(dir, sizeof(dir), "%s/blob-success", scratch); + test_blob_success(&server, f, dir); + + snprintf(dir, sizeof(dir), "%s/blob-cached", scratch); + test_blob_already_cached(&server, f, dir); + + snprintf(dir, sizeof(dir), "%s/blob-oversize", scratch); + test_blob_size_mismatch(&server, f, dir); + + snprintf(dir, sizeof(dir), "%s/blob-digest-bad", scratch); + test_blob_digest_mismatch(&server, f, dir); + + snprintf(dir, sizeof(dir), "%s/blob-404", scratch); + test_blob_404(&server, f, dir); + + oci_fetcher_free(f); + } + + free(base_url); + mock_server_stop(&server); + + if (getenv("OCI_FETCH_ONLINE")) { + printf("oci_fetch (online docker.io)\n"); + test_online_dockerhub(); + } + + wipe_dir(scratch); + free(scratch); + + printf("\nResults: %d/%d passed\n", g_passed, g_total); + return g_passed == g_total ? 0 : 1; +} From c8e1e9785536a067d990077bfbae2d551dad279d Mon Sep 17 00:00:00 2001 From: Max042004 Date: Fri, 15 May 2026 16:26:52 +0800 Subject: [PATCH 5/7] Add OCI registry private-registry options (basic auth, custom CA, insecure) Fourth slice of Phase 1 from issue #31, 4b half. Closes out the oci-roadmap.md Q7 ship list by extending the slice-4a fetcher with HTTP Basic authentication, custom CA bundle, and a loopback-gated TLS verify-off path. fetch_manifest / fetch_blob signatures are unchanged; everything new lives in oci_fetcher_options_t and a new per-easy-handle helper. src/oci/fetch.h grows four fields on oci_fetcher_options_t: username, password, ca_file, allow_insecure. oci_fetcher_new now stashes username/password as a pre-joined "user:pass" string (CURLOPT_USERPWD takes the joined form), strdup's ca_file, and records allow_insecure verbatim. apply_security_opts() is called from every GET callsite (perform_manifest_get, perform_blob_get, fetch_token) right after curl_easy_reset, which attaches CURLOPT_USERPWD plus CURLAUTH_BASIC, CURLOPT_CAINFO, and CURLOPT_SSL_VERIFY{PEER,HOST}=0 when each is set. This shape gives the token endpoint the basic credentials too: a registry that bridges Basic for the token exchange and Bearer for the data API sees both. libcurl drops the USERPWD-derived Authorization header in favor of the manually appended Authorization: Bearer on the retry, so basic gives way to bearer once a token is in hand. The loopback policy gate runs at the entry of oci_fetch_manifest and oci_fetch_blob, not in oci_fetcher_new: ref is not available at construction time, and policy is about which host the fetcher is actually about to talk to. extract_host_from_registry strips the optional :port (and the [] of bracketed IPv6 literals) from ref->registry, is_loopback_host case-insensitively matches against 127.0.0.1 / localhost / ::1, and check_insecure_policy combines them so a non-loopback target with allow_insecure=true returns -1 with errno=EPERM before a single byte is sent. The policy reads ref->registry rather than the test-only base_url_override so unit tests can drive a non-loopback ref while still pointing the mock URL at 127.0.0.1, and the production surface (no override) gets the same answer it would in deployment. tests/test-oci-fetch.c upgrades the in-process mock from plain HTTP to TLS. The mock generates an ephemeral RSA-2048 keypair and a self-signed certificate at startup via OpenSSL EVP, signed for CN=127.0.0.1 with SAN IP:127.0.0.1 + DNS:localhost, valid for one day. The certificate PEM is written into the scratch directory and the fetcher receives the path through opts.ca_file. accept loop wraps each connection in SSL_accept; read/write go through a small io_t abstraction so handler signatures change only in the IO parameter type. mock_send_full keeps the same response shape but writes through SSL_write. libcurl's SSL backend is forced to OpenSSL (LibreSSL on macOS) via curl_global_sslset() called before any other libcurl entry. macOS system libcurl is a multi-SSL build that defaults to Secure Transport, and Secure Transport ignores CURLOPT_CAINFO. Without this pin the ca_file negative cases would pass for the wrong reason: the handshake would succeed against the keychain, not the supplied PEM. LibreSSL on macOS still finds the system trust roots for the OCI_FETCH_ONLINE=1 case, so the online docker.io smoke test continues to work. mk/toolchain.mk auto-detects OPENSSL_PREFIX from /opt/homebrew/opt/openssl@3 (Apple Silicon) or /usr/local/opt/openssl@3 (Intel) and exposes OPENSSL_CFLAGS / OPENSSL_LDFLAGS. The Makefile attaches them only to build/test-oci-fetch (target-specific CFLAGS plus link flags), so the production elfuse binary still has no OpenSSL dependency: the new TLS plumbing is testing scaffolding, not runtime code. Test count grows from 9 to 15 cases. New cases: basic auth success (verifies the server saw "Basic YWxpY2U6c2VjcmV0" exactly once); basic auth carried into the token endpoint (verifies the token GET saw the same basic credentials and the manifest retry switched to Bearer); insecure on a loopback registry is allowed (HTTPS request goes through despite no ca_file); insecure on a non-loopback registry is rejected with errno=EPERM and zero bytes leak to the mock server (request log stays empty); ca_file unset against the self-signed mock fails the handshake with http_status=0; ca_file pointing at an unrelated self-signed certificate also fails the handshake. The 9 existing cases continue to pass over TLS by supplying the mock's CA PEM as ca_file. make check stays green: 78 unit tests, busybox 81/0/3, proctitle, procfs-exec, timeout-disable, OCI-ref 34/34, OCI-digest 25/25, OCI-blob-store 14/14, OCI-manifest 76/76, OCI-fetch 15/15. make test-oci-fetch-online (opt-in) also passes. --- Makefile | 7 +- mk/toolchain.mk | 22 ++ src/oci/fetch.c | 152 +++++++++ src/oci/fetch.h | 27 +- tests/test-oci-fetch.c | 726 ++++++++++++++++++++++++++++++++++++----- 5 files changed, 852 insertions(+), 82 deletions(-) diff --git a/Makefile b/Makefile index b22f494..2caee40 100644 --- a/Makefile +++ b/Makefile @@ -170,10 +170,13 @@ $(BUILD_DIR)/test-oci-manifest: $(BUILD_DIR)/test-oci-manifest.o $(BUILD_DIR)/oc ## Build the OCI fetch (libcurl) unit test (native macOS, no HVF). Pulls in ## blob-store + digest + manifest models + cJSON; links against system libcurl -## and the platform pthread runtime for the in-process mock HTTP server. +## and the platform pthread runtime for the in-process mock HTTP server. The +## test mock terminates TLS using libssl from brew openssl@3 so the ca_file +## negative cases exercise a real certificate verification path. +$(BUILD_DIR)/test-oci-fetch.o: CFLAGS += $(OPENSSL_CFLAGS) $(BUILD_DIR)/test-oci-fetch: $(BUILD_DIR)/test-oci-fetch.o $(BUILD_DIR)/oci/fetch.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o $(BUILD_DIR)/oci/manifest.o $(BUILD_DIR)/oci/media-type.o $(BUILD_DIR)/oci/ref.o $(CJSON_OBJ) | $(BUILD_DIR) @echo " LD $@" - $(Q)$(CC) $(CFLAGS) -o $@ $^ -lcurl -lpthread + $(Q)$(CC) $(CFLAGS) -o $@ $^ -lcurl -lpthread $(OPENSSL_LDFLAGS) # ── Guest test binaries (cross-compiled, aarch64-linux) ────────── # Only used when GUEST_TEST_BINARIES is not set. diff --git a/mk/toolchain.mk b/mk/toolchain.mk index e0f6be4..ec00aa9 100644 --- a/mk/toolchain.mk +++ b/mk/toolchain.mk @@ -42,3 +42,25 @@ SHIM_ASFLAGS ?= -arch arm64 # clang-format CLANG_FORMAT ?= clang-format + +# OpenSSL (Homebrew) for the OCI fetch test scaffolding. The mock HTTP server +# uses libssl/libcrypto to terminate TLS with a self-signed certificate so the +# ca_file negative cases exercise a real handshake. macOS ships LibreSSL +# headers in a private framework and does not publish a usable include path +# under /usr; brew openssl@3 is the documented public location. +ifeq ($(origin OPENSSL_PREFIX),undefined) + ifneq ($(wildcard /opt/homebrew/opt/openssl@3/include/openssl/ssl.h),) + OPENSSL_PREFIX := /opt/homebrew/opt/openssl@3 + else ifneq ($(wildcard /usr/local/opt/openssl@3/include/openssl/ssl.h),) + OPENSSL_PREFIX := /usr/local/opt/openssl@3 + else + OPENSSL_PREFIX := + endif +endif +ifneq ($(OPENSSL_PREFIX),) + OPENSSL_CFLAGS := -I$(OPENSSL_PREFIX)/include + OPENSSL_LDFLAGS := -L$(OPENSSL_PREFIX)/lib -lssl -lcrypto +else + OPENSSL_CFLAGS := + OPENSSL_LDFLAGS := -lssl -lcrypto +endif diff --git a/src/oci/fetch.c b/src/oci/fetch.c index 07cd2be..c87b413 100644 --- a/src/oci/fetch.c +++ b/src/oci/fetch.c @@ -51,6 +51,18 @@ struct oci_fetcher { char *base_url_override; char *bearer_token; bearer_challenge_t challenge; + /* Pre-built "user:pass" string for CURLOPT_USERPWD. NULL when basic auth + * is disabled. The fetcher attaches it to every easy-handle reset (manifest + * GET, blob GET, token GET) so a registry that bridges basic and bearer + * sees the basic credentials on both the manifest probe and the token + * exchange. + */ + char *user_pass; + /* PEM bundle path passed through to CURLOPT_CAINFO. NULL leaves libcurl on + * its compiled-in trust store. + */ + char *ca_file; + bool allow_insecure; }; static pthread_once_t g_curl_init_once = PTHREAD_ONCE_INIT; @@ -88,6 +100,23 @@ static void bearer_challenge_free(bearer_challenge_t *c) c->scope = NULL; } +static char *build_user_pass(const char *user, const char *pass) +{ + if (!user) + return NULL; + size_t ul = strlen(user); + size_t pl = pass ? strlen(pass) : 0; + char *out = malloc(ul + 1 + pl + 1); + if (!out) + return NULL; + memcpy(out, user, ul); + out[ul] = ':'; + if (pl) + memcpy(out + ul + 1, pass, pl); + out[ul + 1 + pl] = '\0'; + return out; +} + oci_fetcher_t *oci_fetcher_new(const oci_fetcher_options_t *opts) { if (oci_fetch_global_init() < 0) @@ -112,6 +141,29 @@ oci_fetcher_t *oci_fetcher_new(const oci_fetcher_options_t *opts) return NULL; } } + if (opts && opts->username) { + f->user_pass = build_user_pass(opts->username, opts->password); + if (!f->user_pass) { + curl_easy_cleanup(f->easy); + free(f->base_url_override); + free(f); + errno = ENOMEM; + return NULL; + } + } + if (opts && opts->ca_file) { + f->ca_file = strdup(opts->ca_file); + if (!f->ca_file) { + curl_easy_cleanup(f->easy); + free(f->base_url_override); + free(f->user_pass); + free(f); + errno = ENOMEM; + return NULL; + } + } + if (opts) + f->allow_insecure = opts->allow_insecure; return f; } @@ -124,6 +176,8 @@ void oci_fetcher_free(oci_fetcher_t *f) free(f->base_url_override); free(f->bearer_token); bearer_challenge_free(&f->challenge); + free(f->user_pass); + free(f->ca_file); free(f); } @@ -141,6 +195,97 @@ void oci_fetch_response_free(oci_fetch_response_t *r) r->http_status = 0; } +/* Strip the [bracketed] form of an IPv6 literal and any trailing :port from a + * registry-shaped string ("127.0.0.1:fake", "ghcr.io", "[::1]:5000", + * "registry.example.com"). Writes the bare host into out and returns true on + * success; returns false when out is too small to fit the result. + * + * Bracketed IPv6 forms have a colon inside the address, so port-stripping + * keys off the closing ']'; for non-bracketed registries the rightmost ':' + * is the port delimiter. + */ +static bool extract_host_from_registry(const char *reg, char *out, size_t cap) +{ + if (!reg || !out || cap == 0) + return false; + if (reg[0] == '[') { + const char *close = strchr(reg, ']'); + if (!close) + return false; + size_t n = (size_t) (close - reg - 1); + if (n + 1 > cap) + return false; + memcpy(out, reg + 1, n); + out[n] = '\0'; + return true; + } + const char *colon = strrchr(reg, ':'); + size_t n = colon ? (size_t) (colon - reg) : strlen(reg); + if (n + 1 > cap) + return false; + memcpy(out, reg, n); + out[n] = '\0'; + return true; +} + +static bool is_loopback_host(const char *host) +{ + if (!host) + return false; + if (!strcasecmp(host, "127.0.0.1")) + return true; + if (!strcasecmp(host, "localhost")) + return true; + if (!strcasecmp(host, "::1")) + return true; + return false; +} + +/* Reject allow_insecure when the registry host is not on the loopback + * whitelist. Honors ref->registry as the authoritative target even when a + * test passes base_url_override, so that policy reflects the production + * surface ("which host am I pulling from?") rather than where the bytes + * happen to flow during a unit test. + */ +static int check_insecure_policy(const oci_fetcher_t *f, const oci_ref_t *ref, + const char **err_msg) +{ + if (!f->allow_insecure) + return 0; + char host[256]; + if (!extract_host_from_registry(ref->registry, host, sizeof(host))) { + if (err_msg) + *err_msg = "registry host is malformed"; + errno = EINVAL; + return -1; + } + if (!is_loopback_host(host)) { + if (err_msg) + *err_msg = "allow_insecure is restricted to loopback registries"; + errno = EPERM; + return -1; + } + return 0; +} + +/* Apply the per-fetcher security options to the easy handle in its post-reset + * state. Called from every GET path (manifest, blob, token) after + * curl_easy_reset so the option set survives the reset. + */ +static void apply_security_opts(CURL *easy, const oci_fetcher_t *f) +{ + if (f->user_pass) { + curl_easy_setopt(easy, CURLOPT_USERPWD, f->user_pass); + curl_easy_setopt(easy, CURLOPT_HTTPAUTH, (long) CURLAUTH_BASIC); + } + if (f->ca_file) + curl_easy_setopt(easy, CURLOPT_CAINFO, f->ca_file); + if (f->allow_insecure) { + curl_easy_setopt(easy, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt(easy, CURLOPT_SSL_VERIFYHOST, 0L); + } +} + /* docker.io is the canonical registry name from the reference parser; the * actual API host is registry-1.docker.io. Every other registry (ghcr.io, * quay.io, public.ecr.aws, mirrors) uses its own host directly. @@ -470,6 +615,7 @@ static int fetch_token(oci_fetcher_t *f, const char **err_msg) body_buf_t body = {.max = FETCH_BODY_MAX}; headers_ctx_t hctx = {0}; curl_easy_reset(f->easy); + apply_security_opts(f->easy, f); curl_easy_setopt(f->easy, CURLOPT_URL, url); curl_easy_setopt(f->easy, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(f->easy, CURLOPT_MAXREDIRS, 5L); @@ -551,6 +697,7 @@ static int perform_manifest_get(oci_fetcher_t *f, bearer_challenge_free(challenge_out); curl_easy_reset(f->easy); + apply_security_opts(f->easy, f); curl_easy_setopt(f->easy, CURLOPT_URL, url); curl_easy_setopt(f->easy, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(f->easy, CURLOPT_MAXREDIRS, 5L); @@ -609,6 +756,8 @@ int oci_fetch_manifest(oci_fetcher_t *f, return -1; } memset(out, 0, sizeof(*out)); + if (check_insecure_policy(f, ref, err_msg) < 0) + return -1; const char *selector = digest_or_tag; if (!selector) selector = ref->digest; @@ -707,6 +856,7 @@ static int perform_blob_get(oci_fetcher_t *f, bearer_challenge_free(challenge_out); curl_easy_reset(f->easy); + apply_security_opts(f->easy, f); curl_easy_setopt(f->easy, CURLOPT_URL, url); curl_easy_setopt(f->easy, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(f->easy, CURLOPT_MAXREDIRS, 5L); @@ -761,6 +911,8 @@ int oci_fetch_blob(oci_fetcher_t *f, errno = EINVAL; return -1; } + if (check_insecure_policy(f, ref, err_msg) < 0) + return -1; if (desc->size < 0) { if (err_msg) *err_msg = "descriptor size is negative"; diff --git a/src/oci/fetch.h b/src/oci/fetch.h index a802abe..e09ccb5 100644 --- a/src/oci/fetch.h +++ b/src/oci/fetch.h @@ -36,12 +36,31 @@ typedef struct { /* Optional override of the registry base URL. When non-NULL, the fetcher * uses this prefix for every /v2/... request instead of computing one * from ref->registry. Test scaffolding sets this to a local mock - * (http://127.0.0.1:); production callers leave it NULL. - * - * Reserved for slice 4b: username, password, ca_file, allow_insecure. - * Treat any unset future field as NULL/false. + * (https://127.0.0.1:); production callers leave it NULL. */ const char *base_url_override; + + /* HTTP Basic authentication. When username is non-NULL, libcurl produces + * Authorization: Basic on every request the fetcher + * issues, including the token endpoint when the registry also requires a + * Bearer flow. password may be NULL for an empty secret. + */ + const char *username; + const char *password; + + /* Path to a PEM-encoded CA bundle. When non-NULL the fetcher passes it to + * libcurl as CURLOPT_CAINFO, replacing the system trust store for that + * connection. Effective only with an OpenSSL-style SSL backend (the + * default macOS Secure Transport backend ignores CAINFO). + */ + const char *ca_file; + + /* Disable TLS verification. Honored only when the resolved registry host + * is on the loopback whitelist (127.0.0.1, localhost, ::1). Any other + * host with allow_insecure=true causes oci_fetch_manifest / + * oci_fetch_blob to fail with errno=EPERM before a single byte is sent. + */ + bool allow_insecure; } oci_fetcher_options_t; typedef struct oci_fetcher oci_fetcher_t; diff --git a/tests/test-oci-fetch.c b/tests/test-oci-fetch.c index eca4968..61edbec 100644 --- a/tests/test-oci-fetch.c +++ b/tests/test-oci-fetch.c @@ -3,17 +3,25 @@ * Copyright 2026 elfuse contributors * SPDX-License-Identifier: Apache-2.0 * - * Spawns a single-threaded HTTP/1.1 mock server on 127.0.0.1: and - * drives oci_fetch_manifest / oci_fetch_blob against it. The mock server is - * scripted per request via a handler function pointer: each test installs the - * behavior it wants (200 OK, 401 with bearer challenge, 404, oversize blob, - * etc.) and verifies the response captured by the fetcher plus side effects - * in a temporary blob store directory. + * Spawns a TLS-terminated HTTP/1.1 mock server on 127.0.0.1: backed + * by a fresh self-signed RSA certificate generated at startup. The certificate + * is written to a scratch CA PEM that the fetcher receives via opts.ca_file; + * negative cases drop the option to force a trust failure. Each test installs + * a handler that scripts the desired response (200, 401 with Bearer challenge, + * 401 demanding Basic auth, 404, oversize blob, digest mismatch, ...) and the + * test verifies fetcher response state plus blob store side effects. * - * No real network is touched. The optional OCI_FETCH_ONLINE=1 environment - * variable enables a single additional case that pulls alpine:3.20 from - * Docker Hub anonymously; that path is gated behind make test-oci-fetch-online - * and is not part of make check. + * libcurl's SSL backend is forced to OpenSSL (LibreSSL on macOS) via + * curl_global_sslset() before init. The macOS system libcurl ships as a + * multi-SSL build and ignores CURLOPT_CAINFO under its default Secure + * Transport backend, which would defeat the ca_file negative cases. The + * OpenSSL backend honours CAINFO and gives consistent behaviour across macOS + * and Linux. + * + * OCI_FETCH_ONLINE=1 enables one extra case that pulls alpine:3.20 from + * Docker Hub anonymously. It shares the LibreSSL backend selected here and + * relies on its default trust roots; it is gated behind + * make test-oci-fetch-online and is not part of make check. */ #include @@ -31,6 +39,14 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + #include "oci/blob-store.h" #include "oci/digest.h" #include "oci/fetch.h" @@ -68,6 +84,32 @@ static void report_fail(const char *name, const char *fmt, ...) printf("\n"); } +/* IO abstraction: every handler reads and writes through an io_t so the + * underlying transport (an SSL session here) is swappable. + */ +typedef struct { + SSL *ssl; +} io_t; + +static ssize_t io_read(io_t *io, void *buf, size_t cap) +{ + int n = SSL_read(io->ssl, buf, (int) cap); + return n > 0 ? (ssize_t) n : -1; +} + +static void io_write(io_t *io, const void *buf, size_t n) +{ + const char *p = buf; + size_t left = n; + while (left) { + int w = SSL_write(io->ssl, p, (int) left); + if (w <= 0) + return; + p += w; + left -= (size_t) w; + } +} + /* ── Mock HTTP server ────────────────────────────────────────────── */ typedef struct { @@ -80,7 +122,7 @@ typedef struct { #define MOCK_LOG_MAX 16 typedef struct mock_server mock_server_t; -typedef void (*mock_handler_t)(mock_server_t *s, int fd, +typedef void (*mock_handler_t)(mock_server_t *s, io_t *io, const mock_request_t *req); struct mock_server { @@ -93,13 +135,15 @@ struct mock_server { mock_request_t log[MOCK_LOG_MAX]; mock_handler_t handler; void *ctx; + SSL_CTX *ssl_ctx; + char ca_pem_path[256]; }; -static ssize_t read_all_until_empty(int fd, char *buf, size_t cap) +static ssize_t read_request_until_empty(io_t *io, char *buf, size_t cap) { size_t off = 0; while (off + 1 < cap) { - ssize_t n = read(fd, buf + off, cap - 1 - off); + ssize_t n = io_read(io, buf + off, cap - 1 - off); if (n <= 0) break; off += (size_t) n; @@ -113,7 +157,6 @@ static ssize_t read_all_until_empty(int fd, char *buf, size_t cap) static void parse_request(const char *raw, mock_request_t *out) { memset(out, 0, sizeof(*out)); - /* Request line: METHOD SP path SP HTTP/x */ const char *sp1 = strchr(raw, ' '); if (!sp1) return; @@ -129,7 +172,6 @@ static void parse_request(const char *raw, mock_request_t *out) plen = sizeof(out->path) - 1; memcpy(out->path, sp1 + 1, plen); - /* Header scan. */ const char *line = strstr(raw, "\r\n"); if (!line) return; @@ -177,9 +219,27 @@ static void *mock_server_loop(void *arg) continue; break; } + SSL *ssl = SSL_new(s->ssl_ctx); + if (!ssl) { + close(cfd); + continue; + } + SSL_set_fd(ssl, cfd); + if (SSL_accept(ssl) <= 0) { + /* Negative-trust tests deliberately abort the handshake; just + * recycle the socket and let the request log stay empty so the + * caller can assert n_requests == 0. + */ + SSL_free(ssl); + close(cfd); + continue; + } + io_t io = {.ssl = ssl}; char buf[8192]; - ssize_t got = read_all_until_empty(cfd, buf, sizeof(buf)); + ssize_t got = read_request_until_empty(&io, buf, sizeof(buf)); if (got <= 0) { + SSL_shutdown(ssl); + SSL_free(ssl); close(cfd); continue; } @@ -194,19 +254,97 @@ static void *mock_server_loop(void *arg) pthread_mutex_unlock(&s->lock); if (h) - h(s, cfd, &req); + h(s, &io, &req); + SSL_shutdown(ssl); + SSL_free(ssl); close(cfd); } return NULL; } -static int mock_server_start(mock_server_t *s) +/* Generate an in-memory RSA keypair + self-signed cert valid for one day, + * covering CN=127.0.0.1 plus SAN IP:127.0.0.1 and DNS:localhost. Writes the + * certificate (PEM) to s->ca_pem_path for the fetcher to consume as + * opts.ca_file. + */ +static int mock_make_cert(mock_server_t *s, const char *scratch_root) +{ + EVP_PKEY *pkey = EVP_RSA_gen(2048); + if (!pkey) + return -1; + X509 *cert = X509_new(); + if (!cert) { + EVP_PKEY_free(pkey); + return -1; + } + X509_set_version(cert, 2); + ASN1_INTEGER_set(X509_get_serialNumber(cert), 1); + X509_gmtime_adj(X509_get_notBefore(cert), 0); + X509_gmtime_adj(X509_get_notAfter(cert), 60 * 60 * 24); + X509_set_pubkey(cert, pkey); + X509_NAME *name = X509_get_subject_name(cert); + X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, + (const unsigned char *) "127.0.0.1", -1, -1, 0); + X509_set_issuer_name(cert, name); + + X509V3_CTX vctx; + X509V3_set_ctx_nodb(&vctx); + X509V3_set_ctx(&vctx, cert, cert, NULL, NULL, 0); + X509_EXTENSION *ext = X509V3_EXT_conf_nid(NULL, &vctx, + NID_subject_alt_name, + "IP:127.0.0.1, DNS:localhost"); + if (ext) { + X509_add_ext(cert, ext, -1); + X509_EXTENSION_free(ext); + } + if (!X509_sign(cert, pkey, EVP_sha256())) { + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + + snprintf(s->ca_pem_path, sizeof(s->ca_pem_path), "%s/mock-ca.pem", + scratch_root); + FILE *fp = fopen(s->ca_pem_path, "w"); + if (!fp) { + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + PEM_write_X509(fp, cert); + fclose(fp); + + s->ssl_ctx = SSL_CTX_new(TLS_server_method()); + if (!s->ssl_ctx) { + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + SSL_CTX_set_min_proto_version(s->ssl_ctx, TLS1_2_VERSION); + if (SSL_CTX_use_certificate(s->ssl_ctx, cert) != 1 || + SSL_CTX_use_PrivateKey(s->ssl_ctx, pkey) != 1) { + SSL_CTX_free(s->ssl_ctx); + s->ssl_ctx = NULL; + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + X509_free(cert); + EVP_PKEY_free(pkey); + return 0; +} + +static int mock_server_start(mock_server_t *s, const char *scratch_root) { memset(s, 0, sizeof(*s)); pthread_mutex_init(&s->lock, NULL); + if (mock_make_cert(s, scratch_root) < 0) { + pthread_mutex_destroy(&s->lock); + return -1; + } s->listen_fd = socket(AF_INET, SOCK_STREAM, 0); if (s->listen_fd < 0) - return -1; + goto err; int yes = 1; setsockopt(s->listen_fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); struct sockaddr_in sa = { @@ -214,25 +352,23 @@ static int mock_server_start(mock_server_t *s) .sin_addr.s_addr = htonl(INADDR_LOOPBACK), .sin_port = 0, }; - if (bind(s->listen_fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) { - close(s->listen_fd); - return -1; - } + if (bind(s->listen_fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) + goto err_sock; socklen_t slen = sizeof(sa); - if (getsockname(s->listen_fd, (struct sockaddr *) &sa, &slen) < 0) { - close(s->listen_fd); - return -1; - } + if (getsockname(s->listen_fd, (struct sockaddr *) &sa, &slen) < 0) + goto err_sock; s->port = ntohs(sa.sin_port); - if (listen(s->listen_fd, 8) < 0) { - close(s->listen_fd); - return -1; - } - if (pthread_create(&s->thread, NULL, mock_server_loop, s) != 0) { - close(s->listen_fd); - return -1; - } + if (listen(s->listen_fd, 8) < 0) + goto err_sock; + if (pthread_create(&s->thread, NULL, mock_server_loop, s) != 0) + goto err_sock; return 0; +err_sock: + close(s->listen_fd); +err: + SSL_CTX_free(s->ssl_ctx); + pthread_mutex_destroy(&s->lock); + return -1; } static void mock_server_stop(mock_server_t *s) @@ -240,7 +376,6 @@ static void mock_server_stop(mock_server_t *s) pthread_mutex_lock(&s->lock); s->stop = true; pthread_mutex_unlock(&s->lock); - /* Unblock the accept by connecting to ourselves. */ int wake = socket(AF_INET, SOCK_STREAM, 0); if (wake >= 0) { struct sockaddr_in sa = { @@ -253,6 +388,7 @@ static void mock_server_stop(mock_server_t *s) } pthread_join(s->thread, NULL); close(s->listen_fd); + SSL_CTX_free(s->ssl_ctx); pthread_mutex_destroy(&s->lock); } @@ -266,7 +402,15 @@ static void mock_set_handler(mock_server_t *s, mock_handler_t h, void *ctx) pthread_mutex_unlock(&s->lock); } -static void mock_send_full(int fd, int status, const char *status_text, +static int mock_request_count(mock_server_t *s) +{ + pthread_mutex_lock(&s->lock); + int n = s->n_requests; + pthread_mutex_unlock(&s->lock); + return n; +} + +static void mock_send_full(io_t *io, int status, const char *status_text, const char *content_type, const char *www_authenticate, const char *docker_digest, @@ -288,9 +432,9 @@ static void mock_send_full(int fd, int status, const char *status_text, n += snprintf(header + n, sizeof(header) - (size_t) n, "Docker-Content-Digest: %s\r\n", docker_digest); n += snprintf(header + n, sizeof(header) - (size_t) n, "\r\n"); - (void) !write(fd, header, (size_t) n); + io_write(io, header, (size_t) n); if (body_len > 0) - (void) !write(fd, body, body_len); + io_write(io, body, body_len); } /* ── Helpers ─────────────────────────────────────────────────────── */ @@ -324,7 +468,7 @@ static char *make_base_url(int port) char *url = malloc(64); if (!url) return NULL; - snprintf(url, 64, "http://127.0.0.1:%d", port); + snprintf(url, 64, "https://127.0.0.1:%d", port); return url; } @@ -353,16 +497,16 @@ typedef struct { const char *docker_digest; } handler_anonymous_manifest_t; -static void h_anonymous_manifest(mock_server_t *s, int fd, +static void h_anonymous_manifest(mock_server_t *s, io_t *io, const mock_request_t *req) { handler_anonymous_manifest_t *ctx = s->ctx; if (strcmp(req->path, ctx->manifest_path) == 0) { - mock_send_full(fd, 200, "OK", ctx->content_type, NULL, ctx->docker_digest, + mock_send_full(io, 200, "OK", ctx->content_type, NULL, ctx->docker_digest, ctx->body, ctx->body_len); return; } - mock_send_full(fd, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); } typedef struct { @@ -374,7 +518,7 @@ typedef struct { char base_url[64]; } handler_bearer_t; -static void h_bearer_flow(mock_server_t *s, int fd, const mock_request_t *req) +static void h_bearer_flow(mock_server_t *s, io_t *io, const mock_request_t *req) { handler_bearer_t *ctx = s->ctx; if (strncmp(req->path, "/token", 6) == 0) { @@ -382,7 +526,7 @@ static void h_bearer_flow(mock_server_t *s, int fd, const mock_request_t *req) int n = snprintf(body, sizeof(body), "{\"token\":\"%s\",\"expires_in\":300}", ctx->expected_token); - mock_send_full(fd, 200, "OK", "application/json", NULL, NULL, body, + mock_send_full(io, 200, "OK", "application/json", NULL, NULL, body, (size_t) n); return; } @@ -390,7 +534,7 @@ static void h_bearer_flow(mock_server_t *s, int fd, const mock_request_t *req) char want_auth[256]; snprintf(want_auth, sizeof(want_auth), "Bearer %s", ctx->expected_token); if (strcmp(req->authorization, want_auth) == 0) { - mock_send_full(fd, 200, "OK", ctx->content_type, NULL, NULL, + mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, ctx->manifest_body, ctx->manifest_body_len); return; } @@ -399,11 +543,11 @@ static void h_bearer_flow(mock_server_t *s, int fd, const mock_request_t *req) "Bearer realm=\"%s/token\",service=\"reg\"," "scope=\"repository:private/secret:pull\"", ctx->base_url); - mock_send_full(fd, 401, "Unauthorized", "application/json", challenge, + mock_send_full(io, 401, "Unauthorized", "application/json", challenge, NULL, "{}", 2); return; } - mock_send_full(fd, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); } typedef struct { @@ -414,16 +558,16 @@ typedef struct { bool oversize; /* if true, send body_len + 5 bytes */ } handler_blob_t; -static void h_blob(mock_server_t *s, int fd, const mock_request_t *req) +static void h_blob(mock_server_t *s, io_t *io, const mock_request_t *req) { handler_blob_t *ctx = s->ctx; if (strcmp(req->path, ctx->blob_path) != 0) { - mock_send_full(fd, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); return; } int status = ctx->status ? ctx->status : 200; if (status != 200) { - mock_send_full(fd, status, "Error", "text/plain", NULL, NULL, "err", 3); + mock_send_full(io, status, "Error", "text/plain", NULL, NULL, "err", 3); return; } if (ctx->oversize) { @@ -431,15 +575,89 @@ static void h_blob(mock_server_t *s, int fd, const mock_request_t *req) char *buf = malloc(pad_len); memcpy(buf, ctx->body, ctx->body_len); memset(buf + ctx->body_len, 'X', 5); - mock_send_full(fd, 200, "OK", "application/octet-stream", NULL, NULL, + mock_send_full(io, 200, "OK", "application/octet-stream", NULL, NULL, buf, pad_len); free(buf); return; } - mock_send_full(fd, 200, "OK", "application/octet-stream", NULL, NULL, + mock_send_full(io, 200, "OK", "application/octet-stream", NULL, NULL, + ctx->body, ctx->body_len); +} + +typedef struct { + const char *manifest_path; + const char *expected_authorization; + const char *body; + size_t body_len; + const char *content_type; +} handler_basic_auth_t; + +static void h_basic_auth(mock_server_t *s, io_t *io, + const mock_request_t *req) +{ + handler_basic_auth_t *ctx = s->ctx; + if (strcmp(req->path, ctx->manifest_path) != 0) { + mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + return; + } + if (strcmp(req->authorization, ctx->expected_authorization) != 0) { + mock_send_full(io, 401, "Unauthorized", "application/json", + "Basic realm=\"reg\"", NULL, "{}", 2); + return; + } + mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, ctx->body, ctx->body_len); } +typedef struct { + const char *manifest_path; + const char *expected_basic; + const char *expected_token; + const char *manifest_body; + size_t manifest_body_len; + const char *content_type; + char base_url[64]; +} handler_basic_then_bearer_t; + +static void h_basic_then_bearer(mock_server_t *s, io_t *io, + const mock_request_t *req) +{ + handler_basic_then_bearer_t *ctx = s->ctx; + if (strncmp(req->path, "/token", 6) == 0) { + if (strcmp(req->authorization, ctx->expected_basic) != 0) { + mock_send_full(io, 401, "Unauthorized", "application/json", NULL, + NULL, "{}", 2); + return; + } + char body[256]; + int n = snprintf(body, sizeof(body), + "{\"token\":\"%s\",\"expires_in\":300}", + ctx->expected_token); + mock_send_full(io, 200, "OK", "application/json", NULL, NULL, body, + (size_t) n); + return; + } + if (strcmp(req->path, ctx->manifest_path) == 0) { + char want_bearer[256]; + snprintf(want_bearer, sizeof(want_bearer), "Bearer %s", + ctx->expected_token); + if (strcmp(req->authorization, want_bearer) == 0) { + mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, + ctx->manifest_body, ctx->manifest_body_len); + return; + } + char challenge[512]; + snprintf(challenge, sizeof(challenge), + "Bearer realm=\"%s/token\",service=\"reg\"," + "scope=\"repository:private/secret:pull\"", + ctx->base_url); + mock_send_full(io, 401, "Unauthorized", "application/json", challenge, + NULL, "{}", 2); + return; + } + mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); +} + /* ── Tests ───────────────────────────────────────────────────────── */ static void test_anonymous_manifest(mock_server_t *server, oci_fetcher_t *f) @@ -557,11 +775,6 @@ static void test_bearer_challenge(mock_server_t *server, oci_fetcher_t *f, static void test_token_reuse(mock_server_t *server, oci_fetcher_t *f) { - /* Second fetch on the same fetcher after a successful bearer flow should - * attach the cached token straight away and skip the 401 dance. The mock - * keeps the same handler from the bearer test in the parent, so a single - * 200 response is expected. - */ int before = server->n_requests; oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -644,9 +857,6 @@ static void test_blob_already_cached(mock_server_t *server, oci_fetcher_t *f, report_fail("blob fetch skips network when already cached", "store"); return; } - /* Pre-populate via put_bytes so the fetch hits the store has() short - * circuit. - */ if (oci_blob_store_put_bytes(store, OCI_DIGEST_SHA256, HELLO_WORLD_SHA256, HELLO_WORLD, strlen(HELLO_WORLD)) != 0) { report_fail("blob fetch skips network when already cached", @@ -655,7 +865,6 @@ static void test_blob_already_cached(mock_server_t *server, oci_fetcher_t *f, return; } - /* Install a handler that would 404 every request, so any contact is a bug. */ handler_blob_t ctx = { .blob_path = "/never-called", .body = "x", @@ -731,10 +940,6 @@ static void test_blob_size_mismatch(mock_server_t *server, oci_fetcher_t *f, static void test_blob_digest_mismatch(mock_server_t *server, oci_fetcher_t *f, const char *store_root) { - /* Server returns "hello world" but the descriptor declares a different - * digest hex. Bytes-in matches declared size exactly, so the only - * mismatch is at commit time. - */ static const char WRONG_HEX[] = "0000000000000000000000000000000000000000000000000000000000000000"; oci_blob_store_t *store = oci_blob_store_open(store_root); @@ -808,6 +1013,347 @@ static void test_blob_404(mock_server_t *server, oci_fetcher_t *f, oci_blob_store_close(store); } +/* ── Slice 4b cases ──────────────────────────────────────────────── */ + +static void test_basic_auth_success(mock_server_t *server, const char *base_url, + const char *ca_pem) +{ + /* alice:secret encoded as base64. */ + handler_basic_auth_t ctx = { + .manifest_path = "/v2/private/area/manifests/v1", + .expected_authorization = "Basic YWxpY2U6c2VjcmV0", + .body = "{\"schemaVersion\":2}", + .body_len = strlen("{\"schemaVersion\":2}"), + .content_type = "application/vnd.oci.image.manifest.v1+json", + }; + mock_set_handler(server, h_basic_auth, &ctx); + + oci_fetcher_options_t opts = { + .base_url_override = base_url, + .ca_file = ca_pem, + .username = "alice", + .password = "secret", + }; + oci_fetcher_t *f = oci_fetcher_new(&opts); + if (!f) { + report_fail("basic auth: server accepts credentials", "fetcher new"); + return; + } + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "private/area", + .tag = "v1", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc != 0) { + report_fail("basic auth: server accepts credentials", "rc=%d err=%s", + rc, err ? err : "(none)"); + } else if (resp.http_status != 200) { + report_fail("basic auth: server accepts credentials", "status=%ld", + resp.http_status); + } else if (mock_request_count(server) != 1) { + report_fail("basic auth: server accepts credentials", + "expected 1 request, got %d", mock_request_count(server)); + } else if (strcmp(server->log[0].authorization, + "Basic YWxpY2U6c2VjcmV0") != 0) { + report_fail("basic auth: server accepts credentials", + "Authorization=%s", server->log[0].authorization); + } else { + report_pass("basic auth: server accepts credentials"); + } + oci_fetch_response_free(&resp); + oci_fetcher_free(f); +} + +static void test_basic_then_bearer(mock_server_t *server, const char *base_url, + const char *ca_pem) +{ + static const char BODY[] = "{\"schemaVersion\":2,\"mixed\":true}"; + handler_basic_then_bearer_t ctx = { + .manifest_path = "/v2/private/secret/manifests/v1", + .expected_basic = "Basic Ym9iOmh1bnRlcjI=", + .expected_token = "mixedtoken456", + .manifest_body = BODY, + .manifest_body_len = strlen(BODY), + .content_type = "application/vnd.oci.image.manifest.v1+json", + }; + snprintf(ctx.base_url, sizeof(ctx.base_url), "%s", base_url); + mock_set_handler(server, h_basic_then_bearer, &ctx); + + oci_fetcher_options_t opts = { + .base_url_override = base_url, + .ca_file = ca_pem, + .username = "bob", + .password = "hunter2", + }; + oci_fetcher_t *f = oci_fetcher_new(&opts); + if (!f) { + report_fail("basic auth carried into bearer token endpoint", + "fetcher new"); + return; + } + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "private/secret", + .tag = "v1", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc != 0) { + report_fail("basic auth carried into bearer token endpoint", + "rc=%d err=%s", rc, err ? err : "(none)"); + } else if (resp.http_status != 200 || + resp.body_len != strlen(BODY) || + memcmp(resp.body, BODY, resp.body_len) != 0) { + report_fail("basic auth carried into bearer token endpoint", + "status=%ld body_len=%zu", resp.http_status, resp.body_len); + } else if (server->n_requests != 3) { + report_fail("basic auth carried into bearer token endpoint", + "expected 3 requests, got %d", server->n_requests); + } else if (strncmp(server->log[1].path, "/token", 6) != 0) { + report_fail("basic auth carried into bearer token endpoint", + "second request path=%s", server->log[1].path); + } else if (strcmp(server->log[1].authorization, + "Basic Ym9iOmh1bnRlcjI=") != 0) { + report_fail("basic auth carried into bearer token endpoint", + "token endpoint Authorization=%s", + server->log[1].authorization); + } else if (strcmp(server->log[2].authorization, + "Bearer mixedtoken456") != 0) { + report_fail("basic auth carried into bearer token endpoint", + "retry Authorization=%s", server->log[2].authorization); + } else { + report_pass("basic auth carried into bearer token endpoint"); + } + oci_fetch_response_free(&resp); + oci_fetcher_free(f); +} + +static void test_insecure_loopback_allowed(mock_server_t *server, + const char *base_url) +{ + static const char BODY[] = "{\"schemaVersion\":2}"; + handler_anonymous_manifest_t ctx = { + .manifest_path = "/v2/library/alpine/manifests/3.20", + .body = BODY, + .body_len = strlen(BODY), + .content_type = "application/vnd.oci.image.manifest.v1+json", + .docker_digest = NULL, + }; + mock_set_handler(server, h_anonymous_manifest, &ctx); + + /* No ca_file: verification is suppressed via allow_insecure. The loopback + * registry host (127.0.0.1) is on the whitelist so policy lets the request + * through. + */ + oci_fetcher_options_t opts = { + .base_url_override = base_url, + .allow_insecure = true, + }; + oci_fetcher_t *f = oci_fetcher_new(&opts); + if (!f) { + report_fail("insecure: loopback host bypasses TLS verify", + "fetcher new"); + return; + } + oci_ref_t ref = { + .registry = "127.0.0.1:5000", + .repository = "library/alpine", + .tag = "3.20", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc != 0) { + report_fail("insecure: loopback host bypasses TLS verify", + "rc=%d err=%s", rc, err ? err : "(none)"); + } else if (resp.http_status != 200) { + report_fail("insecure: loopback host bypasses TLS verify", + "status=%ld", resp.http_status); + } else if (mock_request_count(server) != 1) { + report_fail("insecure: loopback host bypasses TLS verify", + "expected 1 request, got %d", mock_request_count(server)); + } else { + report_pass("insecure: loopback host bypasses TLS verify"); + } + oci_fetch_response_free(&resp); + oci_fetcher_free(f); +} + +static void test_insecure_non_loopback_rejected(mock_server_t *server, + const char *base_url, + const char *ca_pem) +{ + /* Install a handler that would respond 200 if reached, so a leak is + * loud. The policy must block the request before any byte goes out and + * leave the request log empty. + */ + handler_anonymous_manifest_t ctx = { + .manifest_path = "/v2/evil/path/manifests/v1", + .body = "{}", + .body_len = 2, + .content_type = "application/json", + .docker_digest = NULL, + }; + mock_set_handler(server, h_anonymous_manifest, &ctx); + + oci_fetcher_options_t opts = { + .base_url_override = base_url, + .ca_file = ca_pem, + .allow_insecure = true, + }; + oci_fetcher_t *f = oci_fetcher_new(&opts); + if (!f) { + report_fail("insecure: non-loopback host rejected", "fetcher new"); + return; + } + oci_ref_t ref = { + .registry = "evil.example.com", + .repository = "evil/path", + .tag = "v1", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + errno = 0; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + int saved_errno = errno; + if (rc != -1) { + report_fail("insecure: non-loopback host rejected", "rc=%d", rc); + } else if (saved_errno != EPERM) { + report_fail("insecure: non-loopback host rejected", "errno=%d (%s)", + saved_errno, strerror(saved_errno)); + } else if (mock_request_count(server) != 0) { + report_fail("insecure: non-loopback host rejected", + "%d request(s) leaked to server", mock_request_count(server)); + } else { + report_pass("insecure: non-loopback host rejected"); + } + oci_fetch_response_free(&resp); + oci_fetcher_free(f); +} + +static void test_ca_file_missing_rejected(mock_server_t *server, + const char *base_url) +{ + /* No ca_file at all: the mock's self-signed certificate cannot be + * verified by LibreSSL's default trust roots, so the TLS handshake must + * fail. Confirms ca_file is the trust pivot. + */ + handler_anonymous_manifest_t ctx = { + .manifest_path = "/v2/library/alpine/manifests/3.20", + .body = "{}", + .body_len = 2, + .content_type = "application/json", + .docker_digest = NULL, + }; + mock_set_handler(server, h_anonymous_manifest, &ctx); + + oci_fetcher_options_t opts = {.base_url_override = base_url}; + oci_fetcher_t *f = oci_fetcher_new(&opts); + if (!f) { + report_fail("ca_file unset: TLS verify fails on self-signed mock", + "fetcher new"); + return; + } + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc == 0) { + report_fail("ca_file unset: TLS verify fails on self-signed mock", + "rc=0 (verify should have failed)"); + } else if (resp.http_status != 0) { + report_fail("ca_file unset: TLS verify fails on self-signed mock", + "got http_status=%ld; handshake should have aborted", + resp.http_status); + } else { + report_pass("ca_file unset: TLS verify fails on self-signed mock"); + } + oci_fetch_response_free(&resp); + oci_fetcher_free(f); +} + +static void test_ca_file_wrong_rejected(mock_server_t *server, + const char *base_url, + const char *scratch_root) +{ + /* ca_file points at a syntactically valid but different self-signed + * cert. libcurl must reject the mock's certificate because it does not + * chain to the supplied CA, proving ca_file is the trust source rather + * than a no-op. + */ + char wrong_path[300]; + snprintf(wrong_path, sizeof(wrong_path), "%s/wrong-ca.pem", scratch_root); + + EVP_PKEY *pkey = EVP_RSA_gen(2048); + X509 *cert = X509_new(); + X509_set_version(cert, 2); + ASN1_INTEGER_set(X509_get_serialNumber(cert), 42); + X509_gmtime_adj(X509_get_notBefore(cert), 0); + X509_gmtime_adj(X509_get_notAfter(cert), 60 * 60 * 24); + X509_set_pubkey(cert, pkey); + X509_NAME *name = X509_get_subject_name(cert); + X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, + (const unsigned char *) "wrong.example", + -1, -1, 0); + X509_set_issuer_name(cert, name); + X509_sign(cert, pkey, EVP_sha256()); + FILE *fp = fopen(wrong_path, "w"); + if (fp) { + PEM_write_X509(fp, cert); + fclose(fp); + } + X509_free(cert); + EVP_PKEY_free(pkey); + + handler_anonymous_manifest_t ctx = { + .manifest_path = "/v2/library/alpine/manifests/3.20", + .body = "{}", + .body_len = 2, + .content_type = "application/json", + .docker_digest = NULL, + }; + mock_set_handler(server, h_anonymous_manifest, &ctx); + + oci_fetcher_options_t opts = { + .base_url_override = base_url, + .ca_file = wrong_path, + }; + oci_fetcher_t *f = oci_fetcher_new(&opts); + if (!f) { + report_fail("ca_file wrong: TLS verify fails", "fetcher new"); + return; + } + oci_ref_t ref = { + .registry = "127.0.0.1:fake", + .repository = "library/alpine", + .tag = "3.20", + }; + oci_fetch_response_t resp = {0}; + const char *err = NULL; + int rc = oci_fetch_manifest(f, &ref, NULL, NULL, &resp, &err); + if (rc == 0) { + report_fail("ca_file wrong: TLS verify fails", + "rc=0 (verify should have failed)"); + } else if (resp.http_status != 0) { + report_fail("ca_file wrong: TLS verify fails", + "got http_status=%ld; handshake should have aborted", + resp.http_status); + } else { + report_pass("ca_file wrong: TLS verify fails"); + } + oci_fetch_response_free(&resp); + oci_fetcher_free(f); + unlink(wrong_path); +} + /* ── Online smoke (opt-in) ───────────────────────────────────────── */ static void test_online_dockerhub(void) @@ -850,13 +1396,31 @@ static void test_online_dockerhub(void) int main(void) { + /* Force libcurl onto the OpenSSL (LibreSSL on macOS) backend before the + * fetcher's pthread_once runs curl_global_init. macOS Secure Transport + * ignores CURLOPT_CAINFO, which would silently turn ca_file into a no-op + * and let trust-failure cases pass for the wrong reason. Must be called + * before any other libcurl function in the process. + */ + if (curl_global_sslset(CURLSSLBACKEND_OPENSSL, NULL, NULL) != + CURLSSLSET_OK) { + fprintf(stderr, + "libcurl OpenSSL backend not available; ca_file negative cases " + "would be vacuously true\n"); + return 1; + } + + SSL_library_init(); + OpenSSL_add_all_algorithms(); + SSL_load_error_strings(); + char *scratch = make_scratch_root(); if (!scratch) { fprintf(stderr, "mkdtemp failed: %s\n", strerror(errno)); return 1; } mock_server_t server; - if (mock_server_start(&server) != 0) { + if (mock_server_start(&server, scratch) != 0) { fprintf(stderr, "mock server start failed: %s\n", strerror(errno)); wipe_dir(scratch); free(scratch); @@ -871,10 +1435,13 @@ int main(void) return 1; } - printf("oci_fetch (mock HTTP @ %s)\n", base_url); + printf("oci_fetch (mock HTTPS @ %s, CA=%s)\n", base_url, server.ca_pem_path); { - oci_fetcher_options_t opts = {.base_url_override = base_url}; + oci_fetcher_options_t opts = { + .base_url_override = base_url, + .ca_file = server.ca_pem_path, + }; oci_fetcher_t *f = oci_fetcher_new(&opts); if (!f) { fprintf(stderr, "oci_fetcher_new failed\n"); @@ -887,9 +1454,6 @@ int main(void) test_anonymous_manifest(&server, f); test_manifest_404(&server, f); - /* bearer_ctx must outlive both bearer tests because the server thread - * holds a pointer to it via mock_set_handler. - */ static const char BEARER_BODY[] = "{\"schemaVersion\":2,\"secret\":true}"; handler_bearer_t bearer_ctx = { @@ -906,11 +1470,11 @@ int main(void) oci_fetcher_free(f); } - /* Each blob test gets its own store directory so dedup short-circuit and - * abort-leaves-no-leftover assertions are independent. - */ { - oci_fetcher_options_t opts = {.base_url_override = base_url}; + oci_fetcher_options_t opts = { + .base_url_override = base_url, + .ca_file = server.ca_pem_path, + }; oci_fetcher_t *f = oci_fetcher_new(&opts); char dir[512]; @@ -932,6 +1496,16 @@ int main(void) oci_fetcher_free(f); } + /* Slice 4b cases: each builds its own fetcher so the auth/trust options + * under test are scoped to a single case. + */ + test_basic_auth_success(&server, base_url, server.ca_pem_path); + test_basic_then_bearer(&server, base_url, server.ca_pem_path); + test_insecure_loopback_allowed(&server, base_url); + test_insecure_non_loopback_rejected(&server, base_url, server.ca_pem_path); + test_ca_file_missing_rejected(&server, base_url); + test_ca_file_wrong_rejected(&server, base_url, scratch); + free(base_url); mock_server_stop(&server); From 08a2f4e64274d6b2a4bd205a64283991a8b270ba Mon Sep 17 00:00:00 2001 From: Max042004 Date: Fri, 15 May 2026 19:06:21 +0800 Subject: [PATCH 6/7] Add OCI local store and elfuse oci pull pipeline Slice 5a of Phase 1 from issue #31. Wires the slice 4a/4b fetcher and the slice 3 manifest parser into the elfuse oci pull command and persists the resolved blob graph on disk. inspect still renders only the canonical reference; the offline manifest-tree renderer ships in slice 5b. src/oci/store.{c,h} wraps the slice-2 content-addressable blob store with a tag-to-digest pin table. On-disk layout under : blobs// (immutable, from slice 2) tmp/blob---XXXXXX (in-flight staging) refs/// (pin file, one line: :) oci_store_open creates the refs/ subtree, then opens a blob store rooted at the same path so the two layers share one directory. oci_store_put_ref refuses digest-only refs (their digest is the pin, no file needed), validates the supplied digest string with oci_digest_parse, mkdir -p's the registry/repository prefix on demand, writes \n into a tmp file alongside the final path, fsyncs, and renames into place. Rename rather than link because tag pins are mutable: pulling alpine:3.20 today may resolve to a different digest than yesterday and overwriting the pin is the correct semantic. The blob layer keeps its link(2) discipline because content-addressed blobs stay immutable. oci_store_get_ref reads the pin file, strips the trailing newline, validates the digest via oci_digest_parse, and returns a heap- allocated copy. Miss reports errno=ENOENT so callers can distinguish "never pulled" from "io error reading pin". oci_store_default_root returns the platform default: $XDG_DATA_HOME/ elfuse/store when set, otherwise $HOME/Library/Application Support/ elfuse/store. Phase 2 will mount a sparse case-sensitive APFS volume at the same path (oci-roadmap.md Q1); the API does not change. src/oci/pull.{c,h} implements the pipeline. oci_pull runs five phases linearly: 1. Fetch the top-level manifest by ref->digest or ref->tag, advertising Accept for both OCI and Docker index + manifest types. 2. Hash the body with SHA-256 and cross-check against the Docker-Content-Digest header when the registry sent one. Body / header mismatch is a hostile-registry signal and aborts before anything else writes to the store. When the user pulled by digest, also cross-check the body digest against ref->digest. 3. Persist the manifest body into blob store at sha256:. 4. If the top-level was an image index, parse it, run oci_index_pick_linux_arm64, fetch the sub-manifest by its descriptor digest with expected-digest verification, persist it, and switch to the sub-manifest body for the next phase. The pin digest stays at the top-level (index) digest so that the next inspect / pull by tag re-walks index then manifest. 5. Parse the manifest, fetch the config blob, fetch each layer blob in manifest order via oci_fetch_blob. Each blob fetch short- circuits when oci_blob_store_has reports a hit, so a re-pull issues zero layer downloads (only the two manifest bodies are re-fetched in the index case; manifest caching is its own future slice). 6. Write the tag-to-manifest-digest pin via oci_store_put_ref. Skip for digest-only refs (no tag to pin). Schema v1 manifests and foreign / nondistributable layers are rejected by oci_manifest_parse from slice 3; oci_pull surfaces those diagnostics and aborts before any partial layer hits the store. The errno preserved across the cleanup goto so callers can key tests off EPROTO / ENOENT / EINVAL without seeing free()'s leftover stomp. Progress output is one line per descriptor with a truncated digest, size, state (downloaded vs cached), and media-type name. -q / --quiet silences it. The full hex still goes into the pin file and the blob store for verification. src/oci/cli.c grows pull argument parsing: --store DIR, -u | --user USER[:PASS], --insecure-ca PEM, --insecure, -q | --quiet, plus the positional reference. Defaults come from oci_store_default_root. split_userpass handles "user", "user:", and "user:pass" forms with one dynamically-allocated buffer the cleanup path frees. inspect, prune, list keep their slice-1 behaviour for now. tests/lib/oci-mock.{c,h} extracts the TLS-terminated HTTP/1.1 mock server from test-oci-fetch.c. The accept loop, ephemeral self-signed RSA-2048 + SAN cert generator, header parser, request log, and mock_send_full response helper all move out so both the fetch and the pull suites share one ~400 LOC implementation. Public symbols gain an oci_mock_ prefix to make the helper boundary explicit. Three small helpers (wipe_dir, scratch_root, base_url) tag along because both suites need them. test-oci-fetch.c shrinks by 380 lines, switches to the new header, and keeps its 15/15 passing. tests/test-oci-store.c covers 9 cases: layout creation, put + get round trip, miss returns ENOENT with out_digest=NULL, digest-only ref is rejected with EINVAL (its digest is the pin), malformed digest string is rejected with EINVAL, deep repository slashes get mkdir -p, pin overwrite replaces the file, blob and pin share the same root, and default_root respects XDG_DATA_HOME / falls back to HOME. tests/test-oci-pull.c covers 6 end-to-end cases against the mock. The test builds a synthetic image at runtime: three layer byte strings, one image config JSON referencing the layer digests, one manifest JSON referencing the config + layer digests, one index JSON referencing the manifest digest. All five digests are real SHA-256 of the actual bytes the mock serves, so the cross-check inside oci_pull exercises a real verification path. The cases are: tag resolves to index resolves to arm64 sub-manifest with config + 3 layers stored and pin written; tag resolves directly to manifest (no index) with pin written; digest- only ref pulls but no pin is written (and get_ref returns EINVAL); re-pull short-circuits layer + config downloads (second pull issues exactly 2 requests: index + sub-manifest); body / Docker-Content-Digest mismatch aborts with EPROTO and no pin written; index without linux/arm64 entry aborts with ENOENT. Makefile / mk/config.mk / mk/tests.mk wire the new translation units: oci/store.o and oci/pull.o join SRCS; test-oci-store.c and test-oci-pull.c land in NATIVE_TESTS so the cross-compile rule skips them; new link rules build test-oci-store and test-oci-pull; tests/lib/ oci-mock.o is a separate object linked into both test-oci-fetch and test-oci-pull with OPENSSL_CFLAGS applied; make check gains two new stages running test-oci-store and test-oci-pull after the existing OCI suites. make check stays fully green: 78 unit tests; busybox 81/0/3; proctitle low-stack; procfs-exec; timeout-disable; OCI-ref 34/34; OCI-digest 25/25; OCI-blob-store 14/14; OCI-manifest 76/76; OCI-fetch 15/15; OCI-store 9/9; OCI-pull 6/6. make test-oci-fetch-online (opt-in) still passes. --- Makefile | 25 +- mk/config.mk | 3 +- mk/tests.mk | 14 +- src/oci/cli.c | 216 +++++++++++- src/oci/pull.c | 346 ++++++++++++++++++ src/oci/pull.h | 58 ++++ src/oci/store.c | 360 +++++++++++++++++++ src/oci/store.h | 81 +++++ tests/lib/oci-mock.c | 394 +++++++++++++++++++++ tests/lib/oci-mock.h | 129 +++++++ tests/test-oci-fetch.c | 543 ++++------------------------- tests/test-oci-pull.c | 772 +++++++++++++++++++++++++++++++++++++++++ tests/test-oci-store.c | 486 ++++++++++++++++++++++++++ 13 files changed, 2947 insertions(+), 480 deletions(-) create mode 100644 src/oci/pull.c create mode 100644 src/oci/pull.h create mode 100644 src/oci/store.c create mode 100644 src/oci/store.h create mode 100644 tests/lib/oci-mock.c create mode 100644 tests/lib/oci-mock.h create mode 100644 tests/test-oci-pull.c create mode 100644 tests/test-oci-store.c diff --git a/Makefile b/Makefile index 2caee40..3b303df 100644 --- a/Makefile +++ b/Makefile @@ -70,7 +70,9 @@ SRCS := \ oci/blob-store.c \ oci/media-type.c \ oci/manifest.c \ - oci/fetch.c + oci/fetch.c \ + oci/store.c \ + oci/pull.c SRCS := $(addprefix src/,$(SRCS)) OBJS := $(patsubst src/%.c,$(BUILD_DIR)/%.o,$(SRCS)) @@ -168,13 +170,32 @@ $(BUILD_DIR)/test-oci-manifest: $(BUILD_DIR)/test-oci-manifest.o $(BUILD_DIR)/oc @echo " LD $@" $(Q)$(CC) $(CFLAGS) -o $@ $^ +## Build the shared OCI mock HTTPS server helper. tests/lib/oci-mock.{c,h} +## terminates TLS via libssl from brew openssl@3; both the fetch and pull +## suites link against the same compiled object to avoid duplicating ~400 LOC +## of scaffolding in their own translation units. +$(BUILD_DIR)/lib/oci-mock.o: CFLAGS += $(OPENSSL_CFLAGS) + ## Build the OCI fetch (libcurl) unit test (native macOS, no HVF). Pulls in ## blob-store + digest + manifest models + cJSON; links against system libcurl ## and the platform pthread runtime for the in-process mock HTTP server. The ## test mock terminates TLS using libssl from brew openssl@3 so the ca_file ## negative cases exercise a real certificate verification path. $(BUILD_DIR)/test-oci-fetch.o: CFLAGS += $(OPENSSL_CFLAGS) -$(BUILD_DIR)/test-oci-fetch: $(BUILD_DIR)/test-oci-fetch.o $(BUILD_DIR)/oci/fetch.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o $(BUILD_DIR)/oci/manifest.o $(BUILD_DIR)/oci/media-type.o $(BUILD_DIR)/oci/ref.o $(CJSON_OBJ) | $(BUILD_DIR) +$(BUILD_DIR)/test-oci-fetch: $(BUILD_DIR)/test-oci-fetch.o $(BUILD_DIR)/lib/oci-mock.o $(BUILD_DIR)/oci/fetch.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o $(BUILD_DIR)/oci/manifest.o $(BUILD_DIR)/oci/media-type.o $(BUILD_DIR)/oci/ref.o $(CJSON_OBJ) | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ -lcurl -lpthread $(OPENSSL_LDFLAGS) + +## Build the OCI local store unit test (native macOS, no HVF). Pure C; links +## against the store wrapper plus its blob-store and digest dependencies. +$(BUILD_DIR)/test-oci-store: $(BUILD_DIR)/test-oci-store.o $(BUILD_DIR)/oci/store.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o $(BUILD_DIR)/oci/ref.o | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ + +## Build the OCI pull pipeline unit test (native macOS, no HVF). Shares the +## TLS-terminating mock server with test-oci-fetch via tests/lib/oci-mock. +$(BUILD_DIR)/test-oci-pull.o: CFLAGS += $(OPENSSL_CFLAGS) +$(BUILD_DIR)/test-oci-pull: $(BUILD_DIR)/test-oci-pull.o $(BUILD_DIR)/lib/oci-mock.o $(BUILD_DIR)/oci/pull.o $(BUILD_DIR)/oci/store.o $(BUILD_DIR)/oci/fetch.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o $(BUILD_DIR)/oci/manifest.o $(BUILD_DIR)/oci/media-type.o $(BUILD_DIR)/oci/ref.o $(CJSON_OBJ) | $(BUILD_DIR) @echo " LD $@" $(Q)$(CC) $(CFLAGS) -o $@ $^ -lcurl -lpthread $(OPENSSL_LDFLAGS) diff --git a/mk/config.mk b/mk/config.mk index b42e8f7..02ee60f 100644 --- a/mk/config.mk +++ b/mk/config.mk @@ -17,7 +17,8 @@ endif # Exclude native macOS test files from cross-compilation NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c tests/test-oci-ref.c \ tests/test-oci-digest.c tests/test-oci-blob-store.c \ - tests/test-oci-manifest.c tests/test-oci-fetch.c + tests/test-oci-manifest.c tests/test-oci-fetch.c \ + tests/test-oci-store.c tests/test-oci-pull.c SPECIAL_TEST_SRCS := tests/test-lowbase-mem.c SPECIAL_TEST_BINS := $(BUILD_DIR)/test-lowbase-mem-200000 $(BUILD_DIR)/test-lowbase-mem-300000 diff --git a/mk/tests.mk b/mk/tests.mk index ee0aad3..2a162cf 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -7,7 +7,7 @@ test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \ test-full test-multi-vcpu test-rwx \ test-oci-ref test-oci-digest test-oci-blob-store test-oci-manifest \ - test-oci-fetch test-oci-fetch-online \ + test-oci-fetch test-oci-fetch-online test-oci-store test-oci-pull \ test-sysroot-rename \ test-case-collision test-case-collision-fallback test-sysroot-create-paths \ test-proctitle-low-stack \ @@ -44,6 +44,10 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage @$(MAKE) --no-print-directory test-oci-manifest @printf "\n$(BLUE)━━━ OCI fetch unit tests (offline mock HTTP) ━━━$(RESET)\n" @$(MAKE) --no-print-directory test-oci-fetch + @printf "\n$(BLUE)━━━ OCI store unit tests ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-store + @printf "\n$(BLUE)━━━ OCI pull pipeline unit tests ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-pull ## Run the OCI image reference parser unit tests (native, no HVF) test-oci-ref: $(BUILD_DIR)/test-oci-ref @@ -72,6 +76,14 @@ test-oci-fetch: $(BUILD_DIR)/test-oci-fetch test-oci-fetch-online: $(BUILD_DIR)/test-oci-fetch @OCI_FETCH_ONLINE=1 $(BUILD_DIR)/test-oci-fetch +## Run the OCI local store unit tests (native, no HVF) +test-oci-store: $(BUILD_DIR)/test-oci-store + @$(BUILD_DIR)/test-oci-store + +## Run the OCI pull pipeline unit tests (native, no HVF, no network) +test-oci-pull: $(BUILD_DIR)/test-oci-pull + @$(BUILD_DIR)/test-oci-pull + test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename @tmpdir=$$(mktemp -d); \ trap 'rm -rf "$$tmpdir"; rm -f /tmp/elfuse-sysroot-rename-dst.txt' EXIT; \ diff --git a/src/oci/cli.c b/src/oci/cli.c index 314917d..3b9fc59 100644 --- a/src/oci/cli.c +++ b/src/oci/cli.c @@ -3,19 +3,24 @@ * Copyright 2026 elfuse contributors * SPDX-License-Identifier: Apache-2.0 * - * Phase 1 only wires the inspect path through the reference parser. pull, - * prune, and list intentionally exit 2 with an explanatory message so early - * users get a stable surface to script against without touching code that - * does not yet exist. + * Slice 5a turns pull into a real subcommand: argument parsing for --store, + * -u USER[:PASS], --insecure-ca PEM, --insecure, -q, plus the actual oci_pull + * invocation against a freshly opened store and fetcher. inspect, prune, and + * list still rely on inspect's slice-1 canonical-ref print or return rc=2 + * "not implemented yet" (inspect's offline rendering lands in slice 5b). */ #include "cli.h" +#include #include #include #include +#include "fetch.h" +#include "pull.h" #include "ref.h" +#include "store.h" static int print_usage(FILE *out) { @@ -23,10 +28,18 @@ static int print_usage(FILE *out) "usage: elfuse oci [args]\n" "\n" "Subcommands:\n" - " pull Download an image into the local store\n" - " inspect Show the canonical reference and parsed fields\n" - " prune Remove unreferenced blobs from the local store\n" - " list List images in the local store\n" + " pull [OPTIONS] Download an image into the local store\n" + " inspect Show the canonical reference and parsed fields\n" + " prune Remove unreferenced blobs from the local store\n" + " list List images in the local store\n" + "\n" + "Pull options:\n" + " --store DIR Override the local store root\n" + " (default: ~/Library/Application Support/elfuse/store)\n" + " -u, --user USER[:PASS] HTTP Basic auth for private registries\n" + " --insecure-ca PEM Trust PEM as the registry CA bundle\n" + " --insecure Skip TLS verify (loopback registries only)\n" + " -q, --quiet Suppress per-blob progress output\n" "\n" "Refs follow the docker/containerd grammar:\n" " alpine, alpine:3.20, user/repo, ghcr.io/owner/img:tag,\n" @@ -63,6 +76,191 @@ static int cmd_inspect(int argc, char **argv) return 0; } +/* Argument parser state for `oci pull`. Defaults are populated by the caller, + * then patched by parse_pull_args. + */ +typedef struct { + const char *store_root; /* heap-owned by main, not by parse */ + const char *user; + const char *password; + const char *ca_file; + bool allow_insecure; + bool quiet; + const char *ref_str; + char *user_pass_buf; /* heap; freed by caller */ +} pull_args_t; + +/* Split USER[:PASS] in-place. Returns 0 on success or -1 with errno=ENOMEM. */ +static int split_userpass(const char *spec, pull_args_t *out) +{ + free(out->user_pass_buf); + out->user_pass_buf = strdup(spec); + if (!out->user_pass_buf) { + errno = ENOMEM; + return -1; + } + char *colon = strchr(out->user_pass_buf, ':'); + if (colon) { + *colon = '\0'; + out->user = out->user_pass_buf; + out->password = colon + 1; + } else { + out->user = out->user_pass_buf; + out->password = ""; + } + return 0; +} + +/* argv layout coming in: ["pull", "--flag", "...", ""]. argv[0] is the + * subcommand name; argv[argc-1] is the ref. Anything in between is options. + * Returns 0 on success, -1 on bad arguments (after printing an error). + */ +static int parse_pull_args(int argc, char **argv, pull_args_t *out) +{ + int i = 1; + while (i < argc) { + const char *a = argv[i]; + if (a[0] != '-') + break; + if (!strcmp(a, "--")) { + i++; + break; + } + if (!strcmp(a, "-h") || !strcmp(a, "--help")) { + return 1; + } else if (!strcmp(a, "-q") || !strcmp(a, "--quiet")) { + out->quiet = true; + } else if (!strcmp(a, "--insecure")) { + out->allow_insecure = true; + } else if (!strcmp(a, "--store")) { + if (++i >= argc) { + fputs("error: --store needs an argument\n", stderr); + return -1; + } + out->store_root = argv[i]; + } else if (!strcmp(a, "-u") || !strcmp(a, "--user")) { + if (++i >= argc) { + fputs("error: -u needs USER[:PASS]\n", stderr); + return -1; + } + if (split_userpass(argv[i], out) < 0) { + fputs("error: out of memory parsing credentials\n", stderr); + return -1; + } + } else if (!strcmp(a, "--insecure-ca")) { + if (++i >= argc) { + fputs("error: --insecure-ca needs a PEM path\n", stderr); + return -1; + } + out->ca_file = argv[i]; + } else { + fprintf(stderr, "error: unknown pull option: %s\n", a); + return -1; + } + i++; + } + if (i >= argc) { + fputs("error: pull needs a reference argument\n", stderr); + return -1; + } + if (i != argc - 1) { + fputs("error: extra arguments after pull reference\n", stderr); + return -1; + } + out->ref_str = argv[i]; + return 0; +} + +static int cmd_pull(int argc, char **argv) +{ + pull_args_t args = {0}; + int prc = parse_pull_args(argc, argv, &args); + if (prc == 1) { + free(args.user_pass_buf); + return print_usage(stdout); + } + if (prc < 0) { + free(args.user_pass_buf); + return 2; + } + + /* Default store root: either --store override or the platform default. */ + char *default_root = NULL; + const char *store_root = args.store_root; + if (!store_root) { + default_root = oci_store_default_root(); + if (!default_root) { + fprintf(stderr, + "error: could not determine default store root " + "(HOME not set?)\n"); + free(args.user_pass_buf); + return 1; + } + store_root = default_root; + } + + oci_ref_t ref = {0}; + const char *err = NULL; + if (oci_ref_parse(args.ref_str, &ref, &err) < 0) { + fprintf(stderr, "error: invalid reference: %s\n", + err ? err : "(unknown)"); + free(default_root); + free(args.user_pass_buf); + return 1; + } + + oci_store_t *store = oci_store_open(store_root); + if (!store) { + fprintf(stderr, "error: could not open store at %s: %s\n", store_root, + strerror(errno)); + oci_ref_free(&ref); + free(default_root); + free(args.user_pass_buf); + return 1; + } + + oci_fetcher_options_t fopts = { + .username = args.user, + .password = args.password, + .ca_file = args.ca_file, + .allow_insecure = args.allow_insecure, + }; + oci_fetcher_t *fetcher = oci_fetcher_new(&fopts); + if (!fetcher) { + fprintf(stderr, "error: could not create fetcher: %s\n", + strerror(errno)); + oci_store_close(store); + oci_ref_free(&ref); + free(default_root); + free(args.user_pass_buf); + return 1; + } + + if (!args.quiet) { + char *canon = oci_ref_canonical(&ref); + fprintf(stderr, "elfuse oci pull %s\n store: %s\n", + canon ? canon : args.ref_str, store_root); + free(canon); + } + + oci_pull_options_t popts = {.quiet = args.quiet}; + err = NULL; + int rc = oci_pull(fetcher, store, &ref, &popts, &err); + if (rc < 0) { + fprintf(stderr, "error: pull failed: %s\n", + err ? err : strerror(errno)); + } else if (!args.quiet) { + fputs("done.\n", stderr); + } + + oci_fetcher_free(fetcher); + oci_store_close(store); + oci_ref_free(&ref); + free(default_root); + free(args.user_pass_buf); + return rc < 0 ? 1 : 0; +} + static int cmd_not_implemented(const char *name) { fprintf(stderr, @@ -82,7 +280,7 @@ int oci_cli_main(int argc, char **argv) if (!strcmp(sub, "inspect")) return cmd_inspect(argc - 1, argv + 1); if (!strcmp(sub, "pull")) - return cmd_not_implemented("pull"); + return cmd_pull(argc - 1, argv + 1); if (!strcmp(sub, "prune")) return cmd_not_implemented("prune"); if (!strcmp(sub, "list") || !strcmp(sub, "ls")) diff --git a/src/oci/pull.c b/src/oci/pull.c new file mode 100644 index 0000000..375b32f --- /dev/null +++ b/src/oci/pull.c @@ -0,0 +1,346 @@ +/* elfuse oci pull pipeline + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The pull function is intentionally linear: every state transition (top-level + * fetch, index recurse, config fetch, layer fetch, pin write) flows top-to- + * bottom in oci_pull below. Helpers exist only to remove pure boilerplate + * (response cleanup, hex equality, progress prints), so that a reader of + * oci_pull can follow the registry round trips without chasing through + * indirection. + */ + +#include "pull.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "blob-store.h" +#include "digest.h" +#include "manifest.h" +#include "media-type.h" + +static const char *const PULL_ACCEPT[] = { + "application/vnd.oci.image.index.v1+json", + "application/vnd.docker.distribution.manifest.list.v2+json", + "application/vnd.oci.image.manifest.v1+json", + "application/vnd.docker.distribution.manifest.v2+json", + NULL, +}; + +static FILE *pick_progress(const oci_pull_options_t *opts) +{ + if (!opts) + return stderr; + if (opts->quiet) + return NULL; + return opts->progress ? opts->progress : stderr; +} + +static void progress_line(FILE *fp, const char *kind, const char *digest_str, + int64_t size, const char *state, + const char *media_type) +{ + if (!fp) + return; + /* Truncated digest keeps the line readable; full hex still goes into the + * pin file and the blob store for verification. + */ + char short_digest[24]; + snprintf(short_digest, sizeof(short_digest), "%.19s...", digest_str); + fprintf(fp, " %-9s %-22s %12lldB %-11s %s\n", kind, short_digest, + (long long) size, state ? state : "", + media_type ? media_type : ""); + fflush(fp); +} + +/* Case-insensitive prefix check for "sha256:" / "sha512:". */ +static bool digest_str_matches(const char *want, const char *got) +{ + if (!want || !got) + return false; + return strcasecmp(want, got) == 0; +} + +/* Cross-check the manifest body against the registry-supplied + * Docker-Content-Digest header. Servers usually emit one; when they do not, + * trust the body's local SHA-256. The local hex is what we use to address the + * blob in the store regardless, so a missing header degrades to local-only + * verification but not to silent corruption. + */ +static int verify_manifest_digest(const oci_fetch_response_t *resp, + const char *expected_digest_str, + char *out_digest_str, size_t out_cap, + const char **err_msg) +{ + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (oci_digest_bytes(OCI_DIGEST_SHA256, resp->body, resp->body_len, hex) == + 0) { + if (err_msg) + *err_msg = "failed to hash manifest body"; + errno = EIO; + return -1; + } + int n = snprintf(out_digest_str, out_cap, "sha256:%s", hex); + if (n < 0 || (size_t) n >= out_cap) { + if (err_msg) + *err_msg = "manifest digest buffer too small"; + errno = ENAMETOOLONG; + return -1; + } + if (resp->docker_content_digest && + !digest_str_matches(resp->docker_content_digest, out_digest_str)) { + if (err_msg) + *err_msg = "manifest body digest does not match " + "Docker-Content-Digest header"; + errno = EPROTO; + return -1; + } + if (expected_digest_str && + !digest_str_matches(expected_digest_str, out_digest_str)) { + if (err_msg) + *err_msg = "manifest body digest does not match expected digest"; + errno = EPROTO; + return -1; + } + return 0; +} + +/* Fetch a manifest document (image index, image manifest, or sub-manifest) by + * selector, hash its body, cross-check against expected_digest_str (when + * non-NULL), and write it into the local blob store. Returns 0 on success and + * fills *out_digest_str with the canonical "sha256:" representation. The + * caller frees *out_response via oci_fetch_response_free. + */ +static int fetch_and_persist_manifest(oci_fetcher_t *f, + oci_store_t *store, + const oci_ref_t *ref, + const char *selector, + const char *expected_digest_str, + oci_fetch_response_t *out_resp, + char *out_digest_str, size_t out_cap, + const char **err_msg) +{ + memset(out_resp, 0, sizeof(*out_resp)); + if (oci_fetch_manifest(f, ref, selector, PULL_ACCEPT, out_resp, err_msg) < + 0) { + return -1; + } + if (out_resp->body_len == 0 || !out_resp->body) { + if (err_msg) + *err_msg = "manifest response had an empty body"; + errno = EPROTO; + return -1; + } + if (verify_manifest_digest(out_resp, expected_digest_str, out_digest_str, + out_cap, err_msg) < 0) { + return -1; + } + char hex[OCI_DIGEST_HEX_MAX + 1]; + oci_digest_algo_t algo; + if (!oci_digest_parse(out_digest_str, &algo, hex)) { + if (err_msg) + *err_msg = "computed manifest digest is malformed"; + errno = EINVAL; + return -1; + } + if (oci_blob_store_put_bytes(oci_store_blobs(store), OCI_DIGEST_SHA256, hex, + out_resp->body, out_resp->body_len) < 0) { + if (err_msg) + *err_msg = "failed to persist manifest body to local store"; + return -1; + } + return 0; +} + +static int parse_top_level(const oci_fetch_response_t *resp, + oci_media_type_t *out_mt, + const char **err_msg) +{ + oci_media_type_t mt = oci_media_type_parse(resp->content_type); + if (mt == OCI_MT_UNKNOWN) { + if (err_msg) + *err_msg = "registry returned an unrecognized Content-Type"; + errno = EPROTO; + return -1; + } + if (!oci_media_type_is_index(mt) && !oci_media_type_is_manifest(mt)) { + if (err_msg) + *err_msg = "registry returned a non-manifest Content-Type"; + errno = EPROTO; + return -1; + } + *out_mt = mt; + return 0; +} + +int oci_pull(oci_fetcher_t *fetcher, + oci_store_t *store, + const oci_ref_t *ref, + const oci_pull_options_t *opts, + const char **err_msg) +{ + if (!fetcher || !store || !ref) { + if (err_msg) + *err_msg = "invalid arguments"; + errno = EINVAL; + return -1; + } + + FILE *progress = pick_progress(opts); + int rc = -1; + oci_fetch_response_t top_resp = {0}; + oci_fetch_response_t sub_resp = {0}; + oci_index_t idx_doc = {0}; + oci_manifest_t manifest = {0}; + bool have_sub = false; + char top_digest_str[OCI_DIGEST_HEX_MAX + 16]; + char sub_digest_str[OCI_DIGEST_HEX_MAX + 16]; + top_digest_str[0] = '\0'; + sub_digest_str[0] = '\0'; + + /* 1. Top-level fetch. Selector defaults to ref->digest, falling through + * to ref->tag, inside oci_fetch_manifest. When the user pulled by digest, + * expected_digest_str is the locked target; pulls by tag accept whatever + * the server resolves the tag to. + */ + if (fetch_and_persist_manifest(fetcher, store, ref, NULL, ref->digest, + &top_resp, top_digest_str, + sizeof(top_digest_str), err_msg) < 0) { + goto out; + } + oci_media_type_t top_mt = OCI_MT_UNKNOWN; + if (parse_top_level(&top_resp, &top_mt, err_msg) < 0) + goto out; + + progress_line(progress, "manifest", top_digest_str, + (int64_t) top_resp.body_len, "downloaded", + oci_media_type_name(top_mt)); + + const char *manifest_body = top_resp.body; + size_t manifest_body_len = top_resp.body_len; + const char *pin_digest_str = top_digest_str; + + /* 2. If top-level was an image index, pick linux/arm64 and refetch. */ + if (oci_media_type_is_index(top_mt)) { + if (oci_index_parse(top_resp.body, top_resp.body_len, &idx_doc, + err_msg) < 0) { + goto out; + } + const oci_index_entry_t *entry = oci_index_pick_linux_arm64(&idx_doc); + if (!entry) { + if (err_msg) + *err_msg = "image index has no linux/arm64 entry"; + errno = ENOENT; + goto out; + } + if (progress) { + fprintf(progress, " picked %-22s %12lldB linux/arm64%s%s\n", + entry->desc.digest_str, (long long) entry->desc.size, + entry->platform.variant && *entry->platform.variant + ? " " + : "", + entry->platform.variant ? entry->platform.variant : ""); + fflush(progress); + } + + if (fetch_and_persist_manifest(fetcher, store, ref, + entry->desc.digest_str, + entry->desc.digest_str, &sub_resp, + sub_digest_str, sizeof(sub_digest_str), + err_msg) < 0) { + goto out; + } + have_sub = true; + oci_media_type_t sub_mt = OCI_MT_UNKNOWN; + if (parse_top_level(&sub_resp, &sub_mt, err_msg) < 0) + goto out; + if (!oci_media_type_is_manifest(sub_mt)) { + if (err_msg) + *err_msg = "index entry resolved to a non-manifest document"; + errno = EPROTO; + goto out; + } + progress_line(progress, "manifest", sub_digest_str, + (int64_t) sub_resp.body_len, "downloaded", + oci_media_type_name(sub_mt)); + + manifest_body = sub_resp.body; + manifest_body_len = sub_resp.body_len; + /* pin_digest_str stays as top_digest_str: the user pulled the tag, + * the registry resolved that tag to the index, so the pin records the + * index digest. Future inspect re-walks index -> manifest. + */ + } + + /* 3. Parse the manifest body. */ + if (oci_manifest_parse(manifest_body, manifest_body_len, &manifest, + err_msg) < 0) { + goto out; + } + + /* 4. Fetch config blob. */ + bool config_cached = oci_blob_store_has(oci_store_blobs(store), + manifest.config.algo, + manifest.config.hex); + if (oci_fetch_blob(fetcher, ref, &manifest.config, oci_store_blobs(store), + err_msg) < 0) { + goto out; + } + progress_line(progress, "config", manifest.config.digest_str, + manifest.config.size, + config_cached ? "cached" : "downloaded", + oci_media_type_name(manifest.config.media_type)); + + /* 5. Fetch each layer blob in manifest order. */ + for (size_t i = 0; i < manifest.nlayers; i++) { + const oci_descriptor_t *layer = &manifest.layers[i]; + bool cached = oci_blob_store_has(oci_store_blobs(store), layer->algo, + layer->hex); + if (oci_fetch_blob(fetcher, ref, layer, oci_store_blobs(store), + err_msg) < 0) { + goto out; + } + progress_line(progress, "layer", layer->digest_str, layer->size, + cached ? "cached" : "downloaded", + oci_media_type_name(layer->media_type)); + } + + /* 6. Pin tag -> top-level digest. Digest-only refs are self-pinning and + * skip this step (oci_store_put_ref refuses them). + */ + if (ref->tag) { + if (oci_store_put_ref(store, ref, pin_digest_str, err_msg) < 0) + goto out; + if (progress) { + fprintf(progress, " pin %s:%s -> %s\n", ref->repository, + ref->tag, pin_digest_str); + fflush(progress); + } + } + + rc = 0; + +out: + /* Preserve the caller-visible errno across cleanup. free / fclose can + * stomp on errno even when they succeed, which would defeat callers that + * key tests off specific values (EPROTO / ENOENT / EINVAL). + */ + { + int saved_errno = errno; + oci_manifest_free(&manifest); + oci_index_free(&idx_doc); + if (have_sub) + oci_fetch_response_free(&sub_resp); + oci_fetch_response_free(&top_resp); + if (rc != 0) + errno = saved_errno; + } + return rc; +} diff --git a/src/oci/pull.h b/src/oci/pull.h new file mode 100644 index 0000000..a10ffb8 --- /dev/null +++ b/src/oci/pull.h @@ -0,0 +1,58 @@ +/* elfuse oci pull pipeline + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Glues the slice 4a/4b fetcher and the slice 3 manifest parser to the + * slice 5a local store. One call to oci_pull resolves an image reference into + * a fully populated blob graph on disk: + * + * 1. Fetch the top-level descriptor by ref->digest or ref->tag. + * 2. Cross-check the response Docker-Content-Digest against a local SHA-256 + * of the body; a mismatch is a hostile-registry signal and aborts. + * 3. If the response is an image index, parse it, pick the linux/arm64 + * sub-manifest (oci-roadmap Q3), and re-fetch by that digest. + * 4. Parse the manifest, fetch the config blob, fetch each layer blob. + * 5. Write the tag-to-manifest-digest pin so the next pull or inspect for + * the same tag is reproducible. + * + * The function is best-effort idempotent: a re-pull of the same reference + * short-circuits all already-present blobs through the slice 4a oci_fetch_blob + * cache check, only the top-level manifest is re-fetched (small bytes; future + * slice can add a manifest cache). + * + * Foreign / nondistributable layers and schema v1 manifests are rejected by + * the parsers in slice 3; oci_pull surfaces the diagnostic and aborts before + * any partial layer hits the store. + */ + +#pragma once + +#include + +#include "fetch.h" +#include "ref.h" +#include "store.h" + +typedef struct { + /* Per-blob progress is written here as one line per descriptor. Set to + * NULL to suppress all output. Defaults to stderr when opts is NULL or + * progress is NULL but suppress_progress is not requested explicitly. + */ + FILE *progress; + /* When true, suppress progress output even if progress is NULL (the + * NULL/default interpretation lands on stderr). Used by elfuse oci + * pull -q. + */ + bool quiet; +} oci_pull_options_t; + +/* Run the pull pipeline. Returns 0 on success, -1 on failure with errno + * preserved and *err_msg (when non-NULL) pointing at a static description. + * The store and fetcher must outlive the call; both are reused across phases. + */ +int oci_pull(oci_fetcher_t *fetcher, + oci_store_t *store, + const oci_ref_t *ref, + const oci_pull_options_t *opts, + const char **err_msg); diff --git a/src/oci/store.c b/src/oci/store.c new file mode 100644 index 0000000..56f8e21 --- /dev/null +++ b/src/oci/store.c @@ -0,0 +1,360 @@ +/* Local OCI image store: blobs + tag-to-digest pinning + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The pin write path uses rename(2) rather than link(2) because tag pins are + * mutable: pulling alpine:3.20 today may resolve to a different digest than + * yesterday, and overwriting the pin is the correct semantic. The blob store + * underneath this layer keeps its link(2) discipline because content-addressed + * blobs are immutable. + */ + +#include "store.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "digest.h" + +/* Largest path the store materializes. Comfortably above PATH_MAX so snprintf + * truncation surfaces as ENAMETOOLONG instead of a silent corruption. + */ +#define STORE_PATH_MAX 4096 + +struct oci_store { + char *root; + oci_blob_store_t *blobs; +}; + +static int mkdir_one(const char *path) +{ + if (mkdir(path, 0755) == 0) + return 0; + if (errno == EEXIST) { + struct stat st; + if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) + return 0; + errno = ENOTDIR; + return -1; + } + return -1; +} + +/* Create every directory along path. Walks component by component so a missing + * intermediate directory does not abort the open. Same shape as the helper in + * blob-store.c; kept independent here to avoid leaking blob-store internals. + */ +static int mkdir_p(const char *path) +{ + char buf[STORE_PATH_MAX]; + size_t len = strlen(path); + if (len == 0 || len >= sizeof(buf)) { + errno = ENAMETOOLONG; + return -1; + } + memcpy(buf, path, len + 1); + + for (size_t i = 1; i < len; i++) { + if (buf[i] != '/') + continue; + buf[i] = '\0'; + if (mkdir_one(buf) < 0) + return -1; + buf[i] = '/'; + } + return mkdir_one(buf); +} + +oci_store_t *oci_store_open(const char *root) +{ + if (!root || !*root) { + errno = EINVAL; + return NULL; + } + oci_blob_store_t *blobs = oci_blob_store_open(root); + if (!blobs) + return NULL; + + char refs[STORE_PATH_MAX]; + int n = snprintf(refs, sizeof(refs), "%s/refs", root); + if (n < 0 || (size_t) n >= sizeof(refs)) { + oci_blob_store_close(blobs); + errno = ENAMETOOLONG; + return NULL; + } + if (mkdir_one(refs) < 0) { + oci_blob_store_close(blobs); + return NULL; + } + + oci_store_t *s = calloc(1, sizeof(*s)); + if (!s) { + oci_blob_store_close(blobs); + errno = ENOMEM; + return NULL; + } + s->root = strdup(root); + if (!s->root) { + free(s); + oci_blob_store_close(blobs); + errno = ENOMEM; + return NULL; + } + s->blobs = blobs; + return s; +} + +void oci_store_close(oci_store_t *s) +{ + if (!s) + return; + oci_blob_store_close(s->blobs); + free(s->root); + free(s); +} + +const char *oci_store_root(const oci_store_t *s) +{ + return s ? s->root : NULL; +} + +oci_blob_store_t *oci_store_blobs(oci_store_t *s) +{ + return s ? s->blobs : NULL; +} + +char *oci_store_default_root(void) +{ + const char *xdg = getenv("XDG_DATA_HOME"); + if (xdg && *xdg) { + size_t n = strlen(xdg) + sizeof("/elfuse/store"); + char *r = malloc(n); + if (!r) { + errno = ENOMEM; + return NULL; + } + snprintf(r, n, "%s/elfuse/store", xdg); + return r; + } + const char *home = getenv("HOME"); + if (!home || !*home) { + errno = ENOENT; + return NULL; + } + static const char SUFFIX[] = "/Library/Application Support/elfuse/store"; + size_t n = strlen(home) + sizeof(SUFFIX); + char *r = malloc(n); + if (!r) { + errno = ENOMEM; + return NULL; + } + snprintf(r, n, "%s%s", home, SUFFIX); + return r; +} + +static int build_ref_dir(const oci_store_t *s, const oci_ref_t *ref, + char *out, size_t cap) +{ + int n = snprintf(out, cap, "%s/refs/%s/%s", s->root, ref->registry, + ref->repository); + if (n < 0 || (size_t) n >= cap) { + errno = ENAMETOOLONG; + return -1; + } + return 0; +} + +static int build_ref_path(const oci_store_t *s, const oci_ref_t *ref, + char *out, size_t cap) +{ + int n = snprintf(out, cap, "%s/refs/%s/%s/%s", s->root, ref->registry, + ref->repository, ref->tag); + if (n < 0 || (size_t) n >= cap) { + errno = ENAMETOOLONG; + return -1; + } + return 0; +} + +static unsigned long pin_seq(void) +{ + static unsigned long n = 0; + return __sync_add_and_fetch(&n, 1); +} + +int oci_store_put_ref(oci_store_t *s, + const oci_ref_t *ref, + const char *digest_str, + const char **err_msg) +{ + if (!s || !ref || !digest_str || !ref->registry || !ref->repository) { + if (err_msg) + *err_msg = "invalid arguments"; + errno = EINVAL; + return -1; + } + if (!ref->tag) { + if (err_msg) + *err_msg = "ref has no tag; digest-only refs are self-pinning"; + errno = EINVAL; + return -1; + } + + /* Validate digest shape so a corrupt caller cannot poison the pin file + * with arbitrary bytes that later defeat oci_store_get_ref. + */ + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse(digest_str, &algo, hex)) { + if (err_msg) + *err_msg = "digest must be lowercase :"; + errno = EINVAL; + return -1; + } + + char dir[STORE_PATH_MAX]; + if (build_ref_dir(s, ref, dir, sizeof(dir)) < 0) { + if (err_msg) + *err_msg = "pin directory path exceeds STORE_PATH_MAX"; + return -1; + } + if (mkdir_p(dir) < 0) { + if (err_msg) + *err_msg = "failed to create pin directory"; + return -1; + } + char path[STORE_PATH_MAX]; + if (build_ref_path(s, ref, path, sizeof(path)) < 0) { + if (err_msg) + *err_msg = "pin file path exceeds STORE_PATH_MAX"; + return -1; + } + + char tmp[STORE_PATH_MAX]; + int n = snprintf(tmp, sizeof(tmp), "%s.tmp-%d-%lu", path, (int) getpid(), + pin_seq()); + if (n < 0 || (size_t) n >= sizeof(tmp)) { + if (err_msg) + *err_msg = "pin tmp path exceeds STORE_PATH_MAX"; + errno = ENAMETOOLONG; + return -1; + } + + int fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) { + if (err_msg) + *err_msg = "failed to create pin tmp file"; + return -1; + } + size_t dlen = strlen(digest_str); + const char nl = '\n'; + if (write(fd, digest_str, dlen) != (ssize_t) dlen || + write(fd, &nl, 1) != 1) { + int saved = errno; + close(fd); + unlink(tmp); + errno = saved; + if (err_msg) + *err_msg = "failed to write pin tmp file"; + return -1; + } + if (fsync(fd) < 0) { + int saved = errno; + close(fd); + unlink(tmp); + errno = saved; + if (err_msg) + *err_msg = "fsync on pin tmp file failed"; + return -1; + } + if (close(fd) < 0) { + int saved = errno; + unlink(tmp); + errno = saved; + if (err_msg) + *err_msg = "close on pin tmp file failed"; + return -1; + } + if (rename(tmp, path) < 0) { + int saved = errno; + unlink(tmp); + errno = saved; + if (err_msg) + *err_msg = "rename of pin tmp file failed"; + return -1; + } + return 0; +} + +int oci_store_get_ref(oci_store_t *s, + const oci_ref_t *ref, + char **out_digest, + const char **err_msg) +{ + if (!s || !ref || !out_digest || !ref->registry || !ref->repository) { + if (err_msg) + *err_msg = "invalid arguments"; + errno = EINVAL; + return -1; + } + *out_digest = NULL; + if (!ref->tag) { + if (err_msg) + *err_msg = "ref has no tag"; + errno = EINVAL; + return -1; + } + + char path[STORE_PATH_MAX]; + if (build_ref_path(s, ref, path, sizeof(path)) < 0) { + if (err_msg) + *err_msg = "pin file path exceeds STORE_PATH_MAX"; + return -1; + } + FILE *fp = fopen(path, "r"); + if (!fp) { + if (err_msg) + *err_msg = errno == ENOENT ? "ref not pinned in local store" + : "failed to open pin file"; + return -1; + } + char buf[OCI_DIGEST_HEX_MAX + 16]; + if (!fgets(buf, sizeof(buf), fp)) { + int saved = ferror(fp) ? errno : EINVAL; + fclose(fp); + errno = saved; + if (err_msg) + *err_msg = "pin file is empty or unreadable"; + return -1; + } + fclose(fp); + + size_t blen = strlen(buf); + while (blen > 0 && (buf[blen - 1] == '\n' || buf[blen - 1] == '\r')) + buf[--blen] = '\0'; + + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse(buf, &algo, hex)) { + if (err_msg) + *err_msg = "pin file does not contain a valid digest"; + errno = EINVAL; + return -1; + } + char *copy = strdup(buf); + if (!copy) { + if (err_msg) + *err_msg = "out of memory"; + errno = ENOMEM; + return -1; + } + *out_digest = copy; + return 0; +} diff --git a/src/oci/store.h b/src/oci/store.h new file mode 100644 index 0000000..28b292b --- /dev/null +++ b/src/oci/store.h @@ -0,0 +1,81 @@ +/* Local OCI image store: blobs + tag-to-digest pinning + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Wraps the slice-2 content-addressable blob store with a tag-to-digest pin + * table so that elfuse oci pull / inspect can reproduce a pull by name. The + * on-disk layout under is: + * + * blobs// finalized blob (immutable) + * tmp/blob---XXXXXX in-flight staging + * refs/// pin file (one line: ":") + * + * The pin file contains the manifest digest captured at pull time so a + * subsequent pull by tag can short-circuit when the blob is already present, + * and elfuse oci inspect can render the manifest offline. + * + * Phase 1 keeps as a plain directory. The sparse case-sensitive APFS + * volume bootstrap (oci-roadmap Q1) is a Phase 2 concern; the volume mount + * point will sit at the same default path so this API does not change. + */ + +#pragma once + +#include "blob-store.h" +#include "ref.h" + +typedef struct oci_store oci_store_t; + +/* Open or create the store rooted at `root`. Ensures blobs//, tmp/, and + * refs/ exist. Returns NULL on failure with errno preserved. + */ +oci_store_t *oci_store_open(const char *root); + +/* Close the store handle. Does not delete on-disk state. Safe on NULL. */ +void oci_store_close(oci_store_t *s); + +/* Return the store root path. The returned pointer is owned by the store and + * is valid until oci_store_close. + */ +const char *oci_store_root(const oci_store_t *s); + +/* Return the underlying blob store handle. The returned pointer is owned by + * the store; do not close it directly. + */ +oci_blob_store_t *oci_store_blobs(oci_store_t *s); + +/* Return the default store root for the current user. macOS XDG-ish: + * $XDG_DATA_HOME/elfuse/store when XDG_DATA_HOME is set + * $HOME/Library/Application Support/elfuse/store otherwise + * Returns a heap-allocated string the caller must free, or NULL on env miss + * (errno=ENOENT) or oom (errno=ENOMEM). + */ +char *oci_store_default_root(void); + +/* Write a tag-to-digest pin for ref. ref->tag must be set; refs without a tag + * are self-pinning by their digest field and putting a pin for them is an + * EINVAL. digest_str is the canonical ":" form of the manifest + * digest captured at pull time. Atomically replaces any existing pin via + * write-to-temp + rename. Creates the refs/// prefix + * directories on demand. + * + * Returns 0 on success, -1 with errno preserved and *err_msg (when non-NULL) + * pointing at a static description on failure. + */ +int oci_store_put_ref(oci_store_t *s, + const oci_ref_t *ref, + const char *digest_str, + const char **err_msg); + +/* Read the pinned manifest digest for ref. ref->tag must be set; digest-only + * refs are self-pinning and trigger EINVAL. On hit returns 0 and writes a + * heap-allocated ":" string into *out_digest (caller frees). On + * miss returns -1 with errno=ENOENT and *out_digest=NULL. Other IO errors + * return -1 with errno preserved. *err_msg (when non-NULL) is populated on + * any non-success path. + */ +int oci_store_get_ref(oci_store_t *s, + const oci_ref_t *ref, + char **out_digest, + const char **err_msg); diff --git a/tests/lib/oci-mock.c b/tests/lib/oci-mock.c new file mode 100644 index 0000000..9efa444 --- /dev/null +++ b/tests/lib/oci-mock.c @@ -0,0 +1,394 @@ +/* Shared TLS-terminated HTTP mock server for OCI test suites + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Moved here from tests/test-oci-fetch.c so both the fetch and the pull test + * suites can drive the same listener without duplicating the OpenSSL + socket + * scaffolding. Behaviour is unchanged; only the symbol names gained an + * oci_mock_ prefix and a few helpers (scratch_root, base_url, wipe_dir) moved + * along to keep their callers terse. + */ + +#include "oci-mock.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +ssize_t oci_mock_io_read(oci_mock_io_t *io, void *buf, size_t cap) +{ + int n = SSL_read(io->ssl, buf, (int) cap); + return n > 0 ? (ssize_t) n : -1; +} + +void oci_mock_io_write(oci_mock_io_t *io, const void *buf, size_t n) +{ + const char *p = buf; + size_t left = n; + while (left) { + int w = SSL_write(io->ssl, p, (int) left); + if (w <= 0) + return; + p += w; + left -= (size_t) w; + } +} + +static ssize_t read_request_until_empty(oci_mock_io_t *io, char *buf, size_t cap) +{ + size_t off = 0; + while (off + 1 < cap) { + ssize_t n = oci_mock_io_read(io, buf + off, cap - 1 - off); + if (n <= 0) + break; + off += (size_t) n; + buf[off] = '\0'; + if (strstr(buf, "\r\n\r\n")) + break; + } + return (ssize_t) off; +} + +static void parse_request(const char *raw, oci_mock_request_t *out) +{ + memset(out, 0, sizeof(*out)); + const char *sp1 = strchr(raw, ' '); + if (!sp1) + return; + size_t mlen = (size_t) (sp1 - raw); + if (mlen >= sizeof(out->method)) + mlen = sizeof(out->method) - 1; + memcpy(out->method, raw, mlen); + const char *sp2 = strchr(sp1 + 1, ' '); + if (!sp2) + return; + size_t plen = (size_t) (sp2 - sp1 - 1); + if (plen >= sizeof(out->path)) + plen = sizeof(out->path) - 1; + memcpy(out->path, sp1 + 1, plen); + + const char *line = strstr(raw, "\r\n"); + if (!line) + return; + line += 2; + while (*line && strncmp(line, "\r\n", 2) != 0) { + const char *eol = strstr(line, "\r\n"); + if (!eol) + break; + size_t llen = (size_t) (eol - line); + if (llen > 13 && !strncasecmp(line, "Authorization:", 14)) { + const char *v = line + 14; + while (*v == ' ') + v++; + size_t vlen = (size_t) (eol - v); + if (vlen >= sizeof(out->authorization)) + vlen = sizeof(out->authorization) - 1; + memcpy(out->authorization, v, vlen); + out->authorization[vlen] = '\0'; + } else if (llen > 6 && !strncasecmp(line, "Accept:", 7)) { + const char *v = line + 7; + while (*v == ' ') + v++; + size_t vlen = (size_t) (eol - v); + if (vlen >= sizeof(out->accept)) + vlen = sizeof(out->accept) - 1; + memcpy(out->accept, v, vlen); + out->accept[vlen] = '\0'; + } + line = eol + 2; + } +} + +static void *mock_server_loop(void *arg) +{ + oci_mock_server_t *s = arg; + while (1) { + pthread_mutex_lock(&s->lock); + bool stop = s->stop; + pthread_mutex_unlock(&s->lock); + if (stop) + break; + int cfd = accept(s->listen_fd, NULL, NULL); + if (cfd < 0) { + if (errno == EINTR) + continue; + break; + } + SSL *ssl = SSL_new(s->ssl_ctx); + if (!ssl) { + close(cfd); + continue; + } + SSL_set_fd(ssl, cfd); + if (SSL_accept(ssl) <= 0) { + /* Negative-trust tests deliberately abort the handshake; just + * recycle the socket and let the request log stay empty so the + * caller can assert n_requests == 0. + */ + SSL_free(ssl); + close(cfd); + continue; + } + oci_mock_io_t io = {.ssl = ssl}; + char buf[8192]; + ssize_t got = read_request_until_empty(&io, buf, sizeof(buf)); + if (got <= 0) { + SSL_shutdown(ssl); + SSL_free(ssl); + close(cfd); + continue; + } + oci_mock_request_t req; + parse_request(buf, &req); + + pthread_mutex_lock(&s->lock); + if (s->n_requests < OCI_MOCK_LOG_MAX) { + s->log[s->n_requests++] = req; + } + oci_mock_handler_t h = s->handler; + pthread_mutex_unlock(&s->lock); + + if (h) + h(s, &io, &req); + SSL_shutdown(ssl); + SSL_free(ssl); + close(cfd); + } + return NULL; +} + +/* Generate an in-memory RSA keypair + self-signed cert valid for one day, + * covering CN=127.0.0.1 plus SAN IP:127.0.0.1 and DNS:localhost. Writes the + * certificate (PEM) to s->ca_pem_path for the fetcher to consume as + * opts.ca_file. + */ +static int mock_make_cert(oci_mock_server_t *s, const char *scratch_root) +{ + EVP_PKEY *pkey = EVP_RSA_gen(2048); + if (!pkey) + return -1; + X509 *cert = X509_new(); + if (!cert) { + EVP_PKEY_free(pkey); + return -1; + } + X509_set_version(cert, 2); + ASN1_INTEGER_set(X509_get_serialNumber(cert), 1); + X509_gmtime_adj(X509_get_notBefore(cert), 0); + X509_gmtime_adj(X509_get_notAfter(cert), 60 * 60 * 24); + X509_set_pubkey(cert, pkey); + X509_NAME *name = X509_get_subject_name(cert); + X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, + (const unsigned char *) "127.0.0.1", -1, -1, 0); + X509_set_issuer_name(cert, name); + + X509V3_CTX vctx; + X509V3_set_ctx_nodb(&vctx); + X509V3_set_ctx(&vctx, cert, cert, NULL, NULL, 0); + X509_EXTENSION *ext = X509V3_EXT_conf_nid(NULL, &vctx, + NID_subject_alt_name, + "IP:127.0.0.1, DNS:localhost"); + if (ext) { + X509_add_ext(cert, ext, -1); + X509_EXTENSION_free(ext); + } + if (!X509_sign(cert, pkey, EVP_sha256())) { + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + + snprintf(s->ca_pem_path, sizeof(s->ca_pem_path), "%s/mock-ca.pem", + scratch_root); + FILE *fp = fopen(s->ca_pem_path, "w"); + if (!fp) { + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + PEM_write_X509(fp, cert); + fclose(fp); + + s->ssl_ctx = SSL_CTX_new(TLS_server_method()); + if (!s->ssl_ctx) { + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + SSL_CTX_set_min_proto_version(s->ssl_ctx, TLS1_2_VERSION); + if (SSL_CTX_use_certificate(s->ssl_ctx, cert) != 1 || + SSL_CTX_use_PrivateKey(s->ssl_ctx, pkey) != 1) { + SSL_CTX_free(s->ssl_ctx); + s->ssl_ctx = NULL; + X509_free(cert); + EVP_PKEY_free(pkey); + return -1; + } + X509_free(cert); + EVP_PKEY_free(pkey); + return 0; +} + +int oci_mock_server_start(oci_mock_server_t *s, const char *scratch_root) +{ + memset(s, 0, sizeof(*s)); + pthread_mutex_init(&s->lock, NULL); + if (mock_make_cert(s, scratch_root) < 0) { + pthread_mutex_destroy(&s->lock); + return -1; + } + s->listen_fd = socket(AF_INET, SOCK_STREAM, 0); + if (s->listen_fd < 0) + goto err; + int yes = 1; + setsockopt(s->listen_fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = 0, + }; + if (bind(s->listen_fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) + goto err_sock; + socklen_t slen = sizeof(sa); + if (getsockname(s->listen_fd, (struct sockaddr *) &sa, &slen) < 0) + goto err_sock; + s->port = ntohs(sa.sin_port); + if (listen(s->listen_fd, 8) < 0) + goto err_sock; + if (pthread_create(&s->thread, NULL, mock_server_loop, s) != 0) + goto err_sock; + return 0; +err_sock: + close(s->listen_fd); +err: + SSL_CTX_free(s->ssl_ctx); + pthread_mutex_destroy(&s->lock); + return -1; +} + +void oci_mock_server_stop(oci_mock_server_t *s) +{ + pthread_mutex_lock(&s->lock); + s->stop = true; + pthread_mutex_unlock(&s->lock); + int wake = socket(AF_INET, SOCK_STREAM, 0); + if (wake >= 0) { + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(s->port), + }; + (void) connect(wake, (struct sockaddr *) &sa, sizeof(sa)); + close(wake); + } + pthread_join(s->thread, NULL); + close(s->listen_fd); + SSL_CTX_free(s->ssl_ctx); + pthread_mutex_destroy(&s->lock); +} + +void oci_mock_set_handler(oci_mock_server_t *s, oci_mock_handler_t h, void *ctx) +{ + pthread_mutex_lock(&s->lock); + s->handler = h; + s->ctx = ctx; + s->n_requests = 0; + memset(s->log, 0, sizeof(s->log)); + pthread_mutex_unlock(&s->lock); +} + +int oci_mock_request_count(oci_mock_server_t *s) +{ + pthread_mutex_lock(&s->lock); + int n = s->n_requests; + pthread_mutex_unlock(&s->lock); + return n; +} + +void *oci_mock_handler_ctx(oci_mock_server_t *s) +{ + return s->ctx; +} + +void oci_mock_send_full(oci_mock_io_t *io, int status, const char *status_text, + const char *content_type, + const char *www_authenticate, + const char *docker_digest, + const void *body, + size_t body_len) +{ + char header[1024]; + int n = snprintf(header, sizeof(header), + "HTTP/1.1 %d %s\r\n" + "Content-Length: %zu\r\n", + status, status_text ? status_text : "OK", body_len); + if (content_type) + n += snprintf(header + n, sizeof(header) - (size_t) n, + "Content-Type: %s\r\n", content_type); + if (www_authenticate) + n += snprintf(header + n, sizeof(header) - (size_t) n, + "Www-Authenticate: %s\r\n", www_authenticate); + if (docker_digest) + n += snprintf(header + n, sizeof(header) - (size_t) n, + "Docker-Content-Digest: %s\r\n", docker_digest); + n += snprintf(header + n, sizeof(header) - (size_t) n, "\r\n"); + oci_mock_io_write(io, header, (size_t) n); + if (body_len > 0) + oci_mock_io_write(io, body, body_len); +} + +static int remove_entry(const char *path, const struct stat *st, int typeflag, + struct FTW *ftwbuf) +{ + (void) st; + (void) typeflag; + (void) ftwbuf; + return remove(path); +} + +void oci_mock_wipe_dir(const char *root) +{ + /* FTW_DEPTH walks children before parents so rmdir does not race against + * still-populated directories. + */ + (void) nftw(root, remove_entry, 8, FTW_DEPTH | FTW_PHYS); +} + +char *oci_mock_make_scratch_root(const char *prefix) +{ + char buf[256]; + int n = snprintf(buf, sizeof(buf), "/tmp/%s-XXXXXX", + prefix && *prefix ? prefix : "elfuse-mock"); + if (n < 0 || (size_t) n >= sizeof(buf)) + return NULL; + if (!mkdtemp(buf)) + return NULL; + return strdup(buf); +} + +char *oci_mock_make_base_url(int port) +{ + char *url = malloc(64); + if (!url) + return NULL; + snprintf(url, 64, "https://127.0.0.1:%d", port); + return url; +} diff --git a/tests/lib/oci-mock.h b/tests/lib/oci-mock.h new file mode 100644 index 0000000..ecd6d94 --- /dev/null +++ b/tests/lib/oci-mock.h @@ -0,0 +1,129 @@ +/* Shared TLS-terminated HTTP mock server for OCI test suites + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Wraps a pthread-driven socket listener plus an OpenSSL session terminator on + * 127.0.0.1:. Each accepted connection is handed to a user-supplied + * handler that reads the parsed mock_request_t and writes a canned response + * via mock_send_full. A fresh self-signed RSA certificate is generated at + * mock_server_start time so callers can feed it to the fetcher as opts.ca_file + * and exercise a real TLS handshake. + * + * The mock predates this header (the original lived inline in + * tests/test-oci-fetch.c). It moved out so both the fetch and the pull suites + * can share the same scaffolding without duplicating ~400 LOC of OpenSSL + + * socket plumbing. Test-specific request handlers and assertion helpers stay + * in their respective .c files. + * + * Threading model: each mock_server_t owns one accept thread plus one short + * worker per accepted connection. Handlers run on the accept thread sequence; + * mock_set_handler is safe to call between requests. mock_request_count + * reports the cumulative count since the last mock_set_handler. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +/* IO abstraction: every handler reads and writes through an io_t so the + * underlying transport (an SSL session here) is swappable. + */ +typedef struct { + SSL *ssl; +} oci_mock_io_t; + +typedef struct { + char method[8]; + char path[1024]; + char authorization[1024]; + char accept[1024]; +} oci_mock_request_t; + +#define OCI_MOCK_LOG_MAX 16 + +typedef struct oci_mock_server oci_mock_server_t; + +typedef void (*oci_mock_handler_t)(oci_mock_server_t *s, oci_mock_io_t *io, + const oci_mock_request_t *req); + +struct oci_mock_server { + int listen_fd; + int port; + pthread_t thread; + pthread_mutex_t lock; + bool stop; + int n_requests; + oci_mock_request_t log[OCI_MOCK_LOG_MAX]; + oci_mock_handler_t handler; + void *ctx; + SSL_CTX *ssl_ctx; + char ca_pem_path[256]; +}; + +/* Start the mock server. scratch_root is a writable directory used as the + * destination for the generated self-signed certificate PEM (path captured in + * s->ca_pem_path). Returns 0 on success and -1 on socket / TLS / pthread + * failure with errno preserved. + */ +int oci_mock_server_start(oci_mock_server_t *s, const char *scratch_root); + +/* Stop the server. Joins the accept thread, frees the SSL_CTX, and closes the + * listening socket. Safe to call once on a successfully started server. + */ +void oci_mock_server_stop(oci_mock_server_t *s); + +/* Install a request handler and reset the request log. The handler runs once + * per accepted connection inside the server's accept thread. + */ +void oci_mock_set_handler(oci_mock_server_t *s, oci_mock_handler_t h, + void *ctx); + +/* Returns the number of requests captured in the log since the last + * mock_set_handler. The handler may receive more than OCI_MOCK_LOG_MAX + * requests but the count is clamped to the log capacity. + */ +int oci_mock_request_count(oci_mock_server_t *s); + +/* Per-connection ctx accessor used by handlers. Equivalent to s->ctx but + * documents the intent in handler bodies. + */ +void *oci_mock_handler_ctx(oci_mock_server_t *s); + +/* Read from / write to the TLS session. Handlers normally use mock_send_full + * for canned responses; raw io_read / io_write exists for custom flows. + */ +ssize_t oci_mock_io_read(oci_mock_io_t *io, void *buf, size_t cap); +void oci_mock_io_write(oci_mock_io_t *io, const void *buf, size_t n); + +/* Compose and send a complete HTTP/1.1 response. status_text defaults to "OK" + * when NULL. content_type / www_authenticate / docker_digest are added to the + * header block only when non-NULL. body may be NULL when body_len is 0. + */ +void oci_mock_send_full(oci_mock_io_t *io, int status, const char *status_text, + const char *content_type, + const char *www_authenticate, + const char *docker_digest, + const void *body, + size_t body_len); + +/* Recursively wipe a directory tree (depth-first remove). Convenience for + * tests that mkdtemp a scratch root and clean it up on exit. + */ +void oci_mock_wipe_dir(const char *root); + +/* mkdtemp helper: create a directory under /tmp matching the given template + * suffix and return a heap-allocated path. Returns NULL on failure with errno + * preserved. + */ +char *oci_mock_make_scratch_root(const char *prefix); + +/* Build "https://127.0.0.1:" into a heap-allocated string for the + * fetcher's base_url_override option. Returns NULL on oom. + */ +char *oci_mock_make_base_url(int port); diff --git a/tests/test-oci-fetch.c b/tests/test-oci-fetch.c index 61edbec..d2531d9 100644 --- a/tests/test-oci-fetch.c +++ b/tests/test-oci-fetch.c @@ -24,28 +24,19 @@ * make test-oci-fetch-online and is not part of make check. */ -#include #include -#include -#include -#include -#include #include #include #include #include #include -#include -#include #include #include -#include #include #include #include #include -#include #include "oci/blob-store.h" #include "oci/digest.h" @@ -53,6 +44,8 @@ #include "oci/manifest.h" #include "oci/ref.h" +#include "lib/oci-mock.h" + #define GREEN "\033[0;32m" #define RED "\033[0;31m" #define RESET "\033[0m" @@ -84,393 +77,9 @@ static void report_fail(const char *name, const char *fmt, ...) printf("\n"); } -/* IO abstraction: every handler reads and writes through an io_t so the - * underlying transport (an SSL session here) is swappable. - */ -typedef struct { - SSL *ssl; -} io_t; - -static ssize_t io_read(io_t *io, void *buf, size_t cap) -{ - int n = SSL_read(io->ssl, buf, (int) cap); - return n > 0 ? (ssize_t) n : -1; -} - -static void io_write(io_t *io, const void *buf, size_t n) -{ - const char *p = buf; - size_t left = n; - while (left) { - int w = SSL_write(io->ssl, p, (int) left); - if (w <= 0) - return; - p += w; - left -= (size_t) w; - } -} - -/* ── Mock HTTP server ────────────────────────────────────────────── */ - -typedef struct { - char method[8]; - char path[1024]; - char authorization[1024]; - char accept[1024]; -} mock_request_t; - -#define MOCK_LOG_MAX 16 - -typedef struct mock_server mock_server_t; -typedef void (*mock_handler_t)(mock_server_t *s, io_t *io, - const mock_request_t *req); - -struct mock_server { - int listen_fd; - int port; - pthread_t thread; - pthread_mutex_t lock; - bool stop; - int n_requests; - mock_request_t log[MOCK_LOG_MAX]; - mock_handler_t handler; - void *ctx; - SSL_CTX *ssl_ctx; - char ca_pem_path[256]; -}; - -static ssize_t read_request_until_empty(io_t *io, char *buf, size_t cap) -{ - size_t off = 0; - while (off + 1 < cap) { - ssize_t n = io_read(io, buf + off, cap - 1 - off); - if (n <= 0) - break; - off += (size_t) n; - buf[off] = '\0'; - if (strstr(buf, "\r\n\r\n")) - break; - } - return (ssize_t) off; -} - -static void parse_request(const char *raw, mock_request_t *out) -{ - memset(out, 0, sizeof(*out)); - const char *sp1 = strchr(raw, ' '); - if (!sp1) - return; - size_t mlen = (size_t) (sp1 - raw); - if (mlen >= sizeof(out->method)) - mlen = sizeof(out->method) - 1; - memcpy(out->method, raw, mlen); - const char *sp2 = strchr(sp1 + 1, ' '); - if (!sp2) - return; - size_t plen = (size_t) (sp2 - sp1 - 1); - if (plen >= sizeof(out->path)) - plen = sizeof(out->path) - 1; - memcpy(out->path, sp1 + 1, plen); - - const char *line = strstr(raw, "\r\n"); - if (!line) - return; - line += 2; - while (*line && strncmp(line, "\r\n", 2) != 0) { - const char *eol = strstr(line, "\r\n"); - if (!eol) - break; - size_t llen = (size_t) (eol - line); - if (llen > 13 && !strncasecmp(line, "Authorization:", 14)) { - const char *v = line + 14; - while (*v == ' ') - v++; - size_t vlen = (size_t) (eol - v); - if (vlen >= sizeof(out->authorization)) - vlen = sizeof(out->authorization) - 1; - memcpy(out->authorization, v, vlen); - out->authorization[vlen] = '\0'; - } else if (llen > 6 && !strncasecmp(line, "Accept:", 7)) { - const char *v = line + 7; - while (*v == ' ') - v++; - size_t vlen = (size_t) (eol - v); - if (vlen >= sizeof(out->accept)) - vlen = sizeof(out->accept) - 1; - memcpy(out->accept, v, vlen); - out->accept[vlen] = '\0'; - } - line = eol + 2; - } -} - -static void *mock_server_loop(void *arg) -{ - mock_server_t *s = arg; - while (1) { - pthread_mutex_lock(&s->lock); - bool stop = s->stop; - pthread_mutex_unlock(&s->lock); - if (stop) - break; - int cfd = accept(s->listen_fd, NULL, NULL); - if (cfd < 0) { - if (errno == EINTR) - continue; - break; - } - SSL *ssl = SSL_new(s->ssl_ctx); - if (!ssl) { - close(cfd); - continue; - } - SSL_set_fd(ssl, cfd); - if (SSL_accept(ssl) <= 0) { - /* Negative-trust tests deliberately abort the handshake; just - * recycle the socket and let the request log stay empty so the - * caller can assert n_requests == 0. - */ - SSL_free(ssl); - close(cfd); - continue; - } - io_t io = {.ssl = ssl}; - char buf[8192]; - ssize_t got = read_request_until_empty(&io, buf, sizeof(buf)); - if (got <= 0) { - SSL_shutdown(ssl); - SSL_free(ssl); - close(cfd); - continue; - } - mock_request_t req; - parse_request(buf, &req); - - pthread_mutex_lock(&s->lock); - if (s->n_requests < MOCK_LOG_MAX) { - s->log[s->n_requests++] = req; - } - mock_handler_t h = s->handler; - pthread_mutex_unlock(&s->lock); - - if (h) - h(s, &io, &req); - SSL_shutdown(ssl); - SSL_free(ssl); - close(cfd); - } - return NULL; -} - -/* Generate an in-memory RSA keypair + self-signed cert valid for one day, - * covering CN=127.0.0.1 plus SAN IP:127.0.0.1 and DNS:localhost. Writes the - * certificate (PEM) to s->ca_pem_path for the fetcher to consume as - * opts.ca_file. +/* Mock server infrastructure lives in tests/lib/oci-mock.{c,h}. This file now + * only carries the test-specific handlers and assertions. */ -static int mock_make_cert(mock_server_t *s, const char *scratch_root) -{ - EVP_PKEY *pkey = EVP_RSA_gen(2048); - if (!pkey) - return -1; - X509 *cert = X509_new(); - if (!cert) { - EVP_PKEY_free(pkey); - return -1; - } - X509_set_version(cert, 2); - ASN1_INTEGER_set(X509_get_serialNumber(cert), 1); - X509_gmtime_adj(X509_get_notBefore(cert), 0); - X509_gmtime_adj(X509_get_notAfter(cert), 60 * 60 * 24); - X509_set_pubkey(cert, pkey); - X509_NAME *name = X509_get_subject_name(cert); - X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, - (const unsigned char *) "127.0.0.1", -1, -1, 0); - X509_set_issuer_name(cert, name); - - X509V3_CTX vctx; - X509V3_set_ctx_nodb(&vctx); - X509V3_set_ctx(&vctx, cert, cert, NULL, NULL, 0); - X509_EXTENSION *ext = X509V3_EXT_conf_nid(NULL, &vctx, - NID_subject_alt_name, - "IP:127.0.0.1, DNS:localhost"); - if (ext) { - X509_add_ext(cert, ext, -1); - X509_EXTENSION_free(ext); - } - if (!X509_sign(cert, pkey, EVP_sha256())) { - X509_free(cert); - EVP_PKEY_free(pkey); - return -1; - } - - snprintf(s->ca_pem_path, sizeof(s->ca_pem_path), "%s/mock-ca.pem", - scratch_root); - FILE *fp = fopen(s->ca_pem_path, "w"); - if (!fp) { - X509_free(cert); - EVP_PKEY_free(pkey); - return -1; - } - PEM_write_X509(fp, cert); - fclose(fp); - - s->ssl_ctx = SSL_CTX_new(TLS_server_method()); - if (!s->ssl_ctx) { - X509_free(cert); - EVP_PKEY_free(pkey); - return -1; - } - SSL_CTX_set_min_proto_version(s->ssl_ctx, TLS1_2_VERSION); - if (SSL_CTX_use_certificate(s->ssl_ctx, cert) != 1 || - SSL_CTX_use_PrivateKey(s->ssl_ctx, pkey) != 1) { - SSL_CTX_free(s->ssl_ctx); - s->ssl_ctx = NULL; - X509_free(cert); - EVP_PKEY_free(pkey); - return -1; - } - X509_free(cert); - EVP_PKEY_free(pkey); - return 0; -} - -static int mock_server_start(mock_server_t *s, const char *scratch_root) -{ - memset(s, 0, sizeof(*s)); - pthread_mutex_init(&s->lock, NULL); - if (mock_make_cert(s, scratch_root) < 0) { - pthread_mutex_destroy(&s->lock); - return -1; - } - s->listen_fd = socket(AF_INET, SOCK_STREAM, 0); - if (s->listen_fd < 0) - goto err; - int yes = 1; - setsockopt(s->listen_fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); - struct sockaddr_in sa = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - .sin_port = 0, - }; - if (bind(s->listen_fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) - goto err_sock; - socklen_t slen = sizeof(sa); - if (getsockname(s->listen_fd, (struct sockaddr *) &sa, &slen) < 0) - goto err_sock; - s->port = ntohs(sa.sin_port); - if (listen(s->listen_fd, 8) < 0) - goto err_sock; - if (pthread_create(&s->thread, NULL, mock_server_loop, s) != 0) - goto err_sock; - return 0; -err_sock: - close(s->listen_fd); -err: - SSL_CTX_free(s->ssl_ctx); - pthread_mutex_destroy(&s->lock); - return -1; -} - -static void mock_server_stop(mock_server_t *s) -{ - pthread_mutex_lock(&s->lock); - s->stop = true; - pthread_mutex_unlock(&s->lock); - int wake = socket(AF_INET, SOCK_STREAM, 0); - if (wake >= 0) { - struct sockaddr_in sa = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - .sin_port = htons(s->port), - }; - (void) connect(wake, (struct sockaddr *) &sa, sizeof(sa)); - close(wake); - } - pthread_join(s->thread, NULL); - close(s->listen_fd); - SSL_CTX_free(s->ssl_ctx); - pthread_mutex_destroy(&s->lock); -} - -static void mock_set_handler(mock_server_t *s, mock_handler_t h, void *ctx) -{ - pthread_mutex_lock(&s->lock); - s->handler = h; - s->ctx = ctx; - s->n_requests = 0; - memset(s->log, 0, sizeof(s->log)); - pthread_mutex_unlock(&s->lock); -} - -static int mock_request_count(mock_server_t *s) -{ - pthread_mutex_lock(&s->lock); - int n = s->n_requests; - pthread_mutex_unlock(&s->lock); - return n; -} - -static void mock_send_full(io_t *io, int status, const char *status_text, - const char *content_type, - const char *www_authenticate, - const char *docker_digest, - const void *body, - size_t body_len) -{ - char header[1024]; - int n = snprintf(header, sizeof(header), - "HTTP/1.1 %d %s\r\n" - "Content-Length: %zu\r\n", - status, status_text ? status_text : "OK", body_len); - if (content_type) - n += snprintf(header + n, sizeof(header) - (size_t) n, - "Content-Type: %s\r\n", content_type); - if (www_authenticate) - n += snprintf(header + n, sizeof(header) - (size_t) n, - "Www-Authenticate: %s\r\n", www_authenticate); - if (docker_digest) - n += snprintf(header + n, sizeof(header) - (size_t) n, - "Docker-Content-Digest: %s\r\n", docker_digest); - n += snprintf(header + n, sizeof(header) - (size_t) n, "\r\n"); - io_write(io, header, (size_t) n); - if (body_len > 0) - io_write(io, body, body_len); -} - -/* ── Helpers ─────────────────────────────────────────────────────── */ - -static int remove_entry(const char *path, const struct stat *st, int typeflag, - struct FTW *ftwbuf) -{ - (void) st; - (void) typeflag; - (void) ftwbuf; - return remove(path); -} - -static void wipe_dir(const char *root) -{ - (void) nftw(root, remove_entry, 8, FTW_DEPTH | FTW_PHYS); -} - -static char *make_scratch_root(void) -{ - char *tmpl = strdup("/tmp/elfuse-oci-fetch-XXXXXX"); - if (!tmpl || !mkdtemp(tmpl)) { - free(tmpl); - return NULL; - } - return tmpl; -} - -static char *make_base_url(int port) -{ - char *url = malloc(64); - if (!url) - return NULL; - snprintf(url, 64, "https://127.0.0.1:%d", port); - return url; -} static void fill_descriptor(oci_descriptor_t *desc, char *digest_str_buf, size_t digest_str_cap, @@ -497,16 +106,16 @@ typedef struct { const char *docker_digest; } handler_anonymous_manifest_t; -static void h_anonymous_manifest(mock_server_t *s, io_t *io, - const mock_request_t *req) +static void h_anonymous_manifest(oci_mock_server_t *s, oci_mock_io_t *io, + const oci_mock_request_t *req) { handler_anonymous_manifest_t *ctx = s->ctx; if (strcmp(req->path, ctx->manifest_path) == 0) { - mock_send_full(io, 200, "OK", ctx->content_type, NULL, ctx->docker_digest, + oci_mock_send_full(io, 200, "OK", ctx->content_type, NULL, ctx->docker_digest, ctx->body, ctx->body_len); return; } - mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + oci_mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); } typedef struct { @@ -518,7 +127,7 @@ typedef struct { char base_url[64]; } handler_bearer_t; -static void h_bearer_flow(mock_server_t *s, io_t *io, const mock_request_t *req) +static void h_bearer_flow(oci_mock_server_t *s, oci_mock_io_t *io, const oci_mock_request_t *req) { handler_bearer_t *ctx = s->ctx; if (strncmp(req->path, "/token", 6) == 0) { @@ -526,7 +135,7 @@ static void h_bearer_flow(mock_server_t *s, io_t *io, const mock_request_t *req) int n = snprintf(body, sizeof(body), "{\"token\":\"%s\",\"expires_in\":300}", ctx->expected_token); - mock_send_full(io, 200, "OK", "application/json", NULL, NULL, body, + oci_mock_send_full(io, 200, "OK", "application/json", NULL, NULL, body, (size_t) n); return; } @@ -534,7 +143,7 @@ static void h_bearer_flow(mock_server_t *s, io_t *io, const mock_request_t *req) char want_auth[256]; snprintf(want_auth, sizeof(want_auth), "Bearer %s", ctx->expected_token); if (strcmp(req->authorization, want_auth) == 0) { - mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, + oci_mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, ctx->manifest_body, ctx->manifest_body_len); return; } @@ -543,11 +152,11 @@ static void h_bearer_flow(mock_server_t *s, io_t *io, const mock_request_t *req) "Bearer realm=\"%s/token\",service=\"reg\"," "scope=\"repository:private/secret:pull\"", ctx->base_url); - mock_send_full(io, 401, "Unauthorized", "application/json", challenge, + oci_mock_send_full(io, 401, "Unauthorized", "application/json", challenge, NULL, "{}", 2); return; } - mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + oci_mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); } typedef struct { @@ -558,16 +167,16 @@ typedef struct { bool oversize; /* if true, send body_len + 5 bytes */ } handler_blob_t; -static void h_blob(mock_server_t *s, io_t *io, const mock_request_t *req) +static void h_blob(oci_mock_server_t *s, oci_mock_io_t *io, const oci_mock_request_t *req) { handler_blob_t *ctx = s->ctx; if (strcmp(req->path, ctx->blob_path) != 0) { - mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + oci_mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); return; } int status = ctx->status ? ctx->status : 200; if (status != 200) { - mock_send_full(io, status, "Error", "text/plain", NULL, NULL, "err", 3); + oci_mock_send_full(io, status, "Error", "text/plain", NULL, NULL, "err", 3); return; } if (ctx->oversize) { @@ -575,12 +184,12 @@ static void h_blob(mock_server_t *s, io_t *io, const mock_request_t *req) char *buf = malloc(pad_len); memcpy(buf, ctx->body, ctx->body_len); memset(buf + ctx->body_len, 'X', 5); - mock_send_full(io, 200, "OK", "application/octet-stream", NULL, NULL, + oci_mock_send_full(io, 200, "OK", "application/octet-stream", NULL, NULL, buf, pad_len); free(buf); return; } - mock_send_full(io, 200, "OK", "application/octet-stream", NULL, NULL, + oci_mock_send_full(io, 200, "OK", "application/octet-stream", NULL, NULL, ctx->body, ctx->body_len); } @@ -592,20 +201,20 @@ typedef struct { const char *content_type; } handler_basic_auth_t; -static void h_basic_auth(mock_server_t *s, io_t *io, - const mock_request_t *req) +static void h_basic_auth(oci_mock_server_t *s, oci_mock_io_t *io, + const oci_mock_request_t *req) { handler_basic_auth_t *ctx = s->ctx; if (strcmp(req->path, ctx->manifest_path) != 0) { - mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + oci_mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); return; } if (strcmp(req->authorization, ctx->expected_authorization) != 0) { - mock_send_full(io, 401, "Unauthorized", "application/json", + oci_mock_send_full(io, 401, "Unauthorized", "application/json", "Basic realm=\"reg\"", NULL, "{}", 2); return; } - mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, + oci_mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, ctx->body, ctx->body_len); } @@ -619,13 +228,13 @@ typedef struct { char base_url[64]; } handler_basic_then_bearer_t; -static void h_basic_then_bearer(mock_server_t *s, io_t *io, - const mock_request_t *req) +static void h_basic_then_bearer(oci_mock_server_t *s, oci_mock_io_t *io, + const oci_mock_request_t *req) { handler_basic_then_bearer_t *ctx = s->ctx; if (strncmp(req->path, "/token", 6) == 0) { if (strcmp(req->authorization, ctx->expected_basic) != 0) { - mock_send_full(io, 401, "Unauthorized", "application/json", NULL, + oci_mock_send_full(io, 401, "Unauthorized", "application/json", NULL, NULL, "{}", 2); return; } @@ -633,7 +242,7 @@ static void h_basic_then_bearer(mock_server_t *s, io_t *io, int n = snprintf(body, sizeof(body), "{\"token\":\"%s\",\"expires_in\":300}", ctx->expected_token); - mock_send_full(io, 200, "OK", "application/json", NULL, NULL, body, + oci_mock_send_full(io, 200, "OK", "application/json", NULL, NULL, body, (size_t) n); return; } @@ -642,7 +251,7 @@ static void h_basic_then_bearer(mock_server_t *s, io_t *io, snprintf(want_bearer, sizeof(want_bearer), "Bearer %s", ctx->expected_token); if (strcmp(req->authorization, want_bearer) == 0) { - mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, + oci_mock_send_full(io, 200, "OK", ctx->content_type, NULL, NULL, ctx->manifest_body, ctx->manifest_body_len); return; } @@ -651,16 +260,16 @@ static void h_basic_then_bearer(mock_server_t *s, io_t *io, "Bearer realm=\"%s/token\",service=\"reg\"," "scope=\"repository:private/secret:pull\"", ctx->base_url); - mock_send_full(io, 401, "Unauthorized", "application/json", challenge, + oci_mock_send_full(io, 401, "Unauthorized", "application/json", challenge, NULL, "{}", 2); return; } - mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); + oci_mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, "nope", 4); } /* ── Tests ───────────────────────────────────────────────────────── */ -static void test_anonymous_manifest(mock_server_t *server, oci_fetcher_t *f) +static void test_anonymous_manifest(oci_mock_server_t *server, oci_fetcher_t *f) { static const char BODY[] = "{\"schemaVersion\":2}"; static const char DIGEST[] = @@ -672,7 +281,7 @@ static void test_anonymous_manifest(mock_server_t *server, oci_fetcher_t *f) .content_type = "application/vnd.oci.image.manifest.v1+json", .docker_digest = DIGEST, }; - mock_set_handler(server, h_anonymous_manifest, &ctx); + oci_mock_set_handler(server, h_anonymous_manifest, &ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -706,7 +315,7 @@ static void test_anonymous_manifest(mock_server_t *server, oci_fetcher_t *f) oci_fetch_response_free(&resp); } -static void test_manifest_404(mock_server_t *server, oci_fetcher_t *f) +static void test_manifest_404(oci_mock_server_t *server, oci_fetcher_t *f) { handler_anonymous_manifest_t ctx = { .manifest_path = "/v2/library/missing/manifests/v9", @@ -715,7 +324,7 @@ static void test_manifest_404(mock_server_t *server, oci_fetcher_t *f) .content_type = "application/json", .docker_digest = NULL, }; - mock_set_handler(server, h_anonymous_manifest, &ctx); + oci_mock_set_handler(server, h_anonymous_manifest, &ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -736,10 +345,10 @@ static void test_manifest_404(mock_server_t *server, oci_fetcher_t *f) oci_fetch_response_free(&resp); } -static void test_bearer_challenge(mock_server_t *server, oci_fetcher_t *f, +static void test_bearer_challenge(oci_mock_server_t *server, oci_fetcher_t *f, handler_bearer_t *ctx) { - mock_set_handler(server, h_bearer_flow, ctx); + oci_mock_set_handler(server, h_bearer_flow, ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -773,7 +382,7 @@ static void test_bearer_challenge(mock_server_t *server, oci_fetcher_t *f, oci_fetch_response_free(&resp); } -static void test_token_reuse(mock_server_t *server, oci_fetcher_t *f) +static void test_token_reuse(oci_mock_server_t *server, oci_fetcher_t *f) { int before = server->n_requests; oci_ref_t ref = { @@ -805,7 +414,7 @@ static const char HELLO_WORLD[] = "hello world"; static const char HELLO_WORLD_SHA256[] = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"; -static void test_blob_success(mock_server_t *server, oci_fetcher_t *f, +static void test_blob_success(oci_mock_server_t *server, oci_fetcher_t *f, const char *store_root) { oci_blob_store_t *store = oci_blob_store_open(store_root); @@ -821,7 +430,7 @@ static void test_blob_success(mock_server_t *server, oci_fetcher_t *f, .body = HELLO_WORLD, .body_len = strlen(HELLO_WORLD), }; - mock_set_handler(server, h_blob, &ctx); + oci_mock_set_handler(server, h_blob, &ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -849,7 +458,7 @@ static void test_blob_success(mock_server_t *server, oci_fetcher_t *f, oci_blob_store_close(store); } -static void test_blob_already_cached(mock_server_t *server, oci_fetcher_t *f, +static void test_blob_already_cached(oci_mock_server_t *server, oci_fetcher_t *f, const char *store_root) { oci_blob_store_t *store = oci_blob_store_open(store_root); @@ -870,7 +479,7 @@ static void test_blob_already_cached(mock_server_t *server, oci_fetcher_t *f, .body = "x", .body_len = 1, }; - mock_set_handler(server, h_blob, &ctx); + oci_mock_set_handler(server, h_blob, &ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -896,7 +505,7 @@ static void test_blob_already_cached(mock_server_t *server, oci_fetcher_t *f, oci_blob_store_close(store); } -static void test_blob_size_mismatch(mock_server_t *server, oci_fetcher_t *f, +static void test_blob_size_mismatch(oci_mock_server_t *server, oci_fetcher_t *f, const char *store_root) { oci_blob_store_t *store = oci_blob_store_open(store_root); @@ -911,7 +520,7 @@ static void test_blob_size_mismatch(mock_server_t *server, oci_fetcher_t *f, .body_len = strlen(HELLO_WORLD), .oversize = true, }; - mock_set_handler(server, h_blob, &ctx); + oci_mock_set_handler(server, h_blob, &ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -937,7 +546,7 @@ static void test_blob_size_mismatch(mock_server_t *server, oci_fetcher_t *f, oci_blob_store_close(store); } -static void test_blob_digest_mismatch(mock_server_t *server, oci_fetcher_t *f, +static void test_blob_digest_mismatch(oci_mock_server_t *server, oci_fetcher_t *f, const char *store_root) { static const char WRONG_HEX[] = @@ -955,7 +564,7 @@ static void test_blob_digest_mismatch(mock_server_t *server, oci_fetcher_t *f, .body = HELLO_WORLD, .body_len = strlen(HELLO_WORLD), }; - mock_set_handler(server, h_blob, &ctx); + oci_mock_set_handler(server, h_blob, &ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -980,7 +589,7 @@ static void test_blob_digest_mismatch(mock_server_t *server, oci_fetcher_t *f, oci_blob_store_close(store); } -static void test_blob_404(mock_server_t *server, oci_fetcher_t *f, +static void test_blob_404(oci_mock_server_t *server, oci_fetcher_t *f, const char *store_root) { oci_blob_store_t *store = oci_blob_store_open(store_root); @@ -989,7 +598,7 @@ static void test_blob_404(mock_server_t *server, oci_fetcher_t *f, .body = "x", .body_len = 1, }; - mock_set_handler(server, h_blob, &ctx); + oci_mock_set_handler(server, h_blob, &ctx); oci_ref_t ref = { .registry = "127.0.0.1:fake", @@ -1015,7 +624,7 @@ static void test_blob_404(mock_server_t *server, oci_fetcher_t *f, /* ── Slice 4b cases ──────────────────────────────────────────────── */ -static void test_basic_auth_success(mock_server_t *server, const char *base_url, +static void test_basic_auth_success(oci_mock_server_t *server, const char *base_url, const char *ca_pem) { /* alice:secret encoded as base64. */ @@ -1026,7 +635,7 @@ static void test_basic_auth_success(mock_server_t *server, const char *base_url, .body_len = strlen("{\"schemaVersion\":2}"), .content_type = "application/vnd.oci.image.manifest.v1+json", }; - mock_set_handler(server, h_basic_auth, &ctx); + oci_mock_set_handler(server, h_basic_auth, &ctx); oci_fetcher_options_t opts = { .base_url_override = base_url, @@ -1053,9 +662,9 @@ static void test_basic_auth_success(mock_server_t *server, const char *base_url, } else if (resp.http_status != 200) { report_fail("basic auth: server accepts credentials", "status=%ld", resp.http_status); - } else if (mock_request_count(server) != 1) { + } else if (oci_mock_request_count(server) != 1) { report_fail("basic auth: server accepts credentials", - "expected 1 request, got %d", mock_request_count(server)); + "expected 1 request, got %d", oci_mock_request_count(server)); } else if (strcmp(server->log[0].authorization, "Basic YWxpY2U6c2VjcmV0") != 0) { report_fail("basic auth: server accepts credentials", @@ -1067,7 +676,7 @@ static void test_basic_auth_success(mock_server_t *server, const char *base_url, oci_fetcher_free(f); } -static void test_basic_then_bearer(mock_server_t *server, const char *base_url, +static void test_basic_then_bearer(oci_mock_server_t *server, const char *base_url, const char *ca_pem) { static const char BODY[] = "{\"schemaVersion\":2,\"mixed\":true}"; @@ -1080,7 +689,7 @@ static void test_basic_then_bearer(mock_server_t *server, const char *base_url, .content_type = "application/vnd.oci.image.manifest.v1+json", }; snprintf(ctx.base_url, sizeof(ctx.base_url), "%s", base_url); - mock_set_handler(server, h_basic_then_bearer, &ctx); + oci_mock_set_handler(server, h_basic_then_bearer, &ctx); oci_fetcher_options_t opts = { .base_url_override = base_url, @@ -1132,7 +741,7 @@ static void test_basic_then_bearer(mock_server_t *server, const char *base_url, oci_fetcher_free(f); } -static void test_insecure_loopback_allowed(mock_server_t *server, +static void test_insecure_loopback_allowed(oci_mock_server_t *server, const char *base_url) { static const char BODY[] = "{\"schemaVersion\":2}"; @@ -1143,7 +752,7 @@ static void test_insecure_loopback_allowed(mock_server_t *server, .content_type = "application/vnd.oci.image.manifest.v1+json", .docker_digest = NULL, }; - mock_set_handler(server, h_anonymous_manifest, &ctx); + oci_mock_set_handler(server, h_anonymous_manifest, &ctx); /* No ca_file: verification is suppressed via allow_insecure. The loopback * registry host (127.0.0.1) is on the whitelist so policy lets the request @@ -1173,9 +782,9 @@ static void test_insecure_loopback_allowed(mock_server_t *server, } else if (resp.http_status != 200) { report_fail("insecure: loopback host bypasses TLS verify", "status=%ld", resp.http_status); - } else if (mock_request_count(server) != 1) { + } else if (oci_mock_request_count(server) != 1) { report_fail("insecure: loopback host bypasses TLS verify", - "expected 1 request, got %d", mock_request_count(server)); + "expected 1 request, got %d", oci_mock_request_count(server)); } else { report_pass("insecure: loopback host bypasses TLS verify"); } @@ -1183,7 +792,7 @@ static void test_insecure_loopback_allowed(mock_server_t *server, oci_fetcher_free(f); } -static void test_insecure_non_loopback_rejected(mock_server_t *server, +static void test_insecure_non_loopback_rejected(oci_mock_server_t *server, const char *base_url, const char *ca_pem) { @@ -1198,7 +807,7 @@ static void test_insecure_non_loopback_rejected(mock_server_t *server, .content_type = "application/json", .docker_digest = NULL, }; - mock_set_handler(server, h_anonymous_manifest, &ctx); + oci_mock_set_handler(server, h_anonymous_manifest, &ctx); oci_fetcher_options_t opts = { .base_url_override = base_url, @@ -1225,9 +834,9 @@ static void test_insecure_non_loopback_rejected(mock_server_t *server, } else if (saved_errno != EPERM) { report_fail("insecure: non-loopback host rejected", "errno=%d (%s)", saved_errno, strerror(saved_errno)); - } else if (mock_request_count(server) != 0) { + } else if (oci_mock_request_count(server) != 0) { report_fail("insecure: non-loopback host rejected", - "%d request(s) leaked to server", mock_request_count(server)); + "%d request(s) leaked to server", oci_mock_request_count(server)); } else { report_pass("insecure: non-loopback host rejected"); } @@ -1235,7 +844,7 @@ static void test_insecure_non_loopback_rejected(mock_server_t *server, oci_fetcher_free(f); } -static void test_ca_file_missing_rejected(mock_server_t *server, +static void test_ca_file_missing_rejected(oci_mock_server_t *server, const char *base_url) { /* No ca_file at all: the mock's self-signed certificate cannot be @@ -1249,7 +858,7 @@ static void test_ca_file_missing_rejected(mock_server_t *server, .content_type = "application/json", .docker_digest = NULL, }; - mock_set_handler(server, h_anonymous_manifest, &ctx); + oci_mock_set_handler(server, h_anonymous_manifest, &ctx); oci_fetcher_options_t opts = {.base_url_override = base_url}; oci_fetcher_t *f = oci_fetcher_new(&opts); @@ -1280,7 +889,7 @@ static void test_ca_file_missing_rejected(mock_server_t *server, oci_fetcher_free(f); } -static void test_ca_file_wrong_rejected(mock_server_t *server, +static void test_ca_file_wrong_rejected(oci_mock_server_t *server, const char *base_url, const char *scratch_root) { @@ -1320,7 +929,7 @@ static void test_ca_file_wrong_rejected(mock_server_t *server, .content_type = "application/json", .docker_digest = NULL, }; - mock_set_handler(server, h_anonymous_manifest, &ctx); + oci_mock_set_handler(server, h_anonymous_manifest, &ctx); oci_fetcher_options_t opts = { .base_url_override = base_url, @@ -1414,23 +1023,23 @@ int main(void) OpenSSL_add_all_algorithms(); SSL_load_error_strings(); - char *scratch = make_scratch_root(); + char *scratch = oci_mock_make_scratch_root("elfuse-oci-fetch"); if (!scratch) { fprintf(stderr, "mkdtemp failed: %s\n", strerror(errno)); return 1; } - mock_server_t server; - if (mock_server_start(&server, scratch) != 0) { + oci_mock_server_t server; + if (oci_mock_server_start(&server, scratch) != 0) { fprintf(stderr, "mock server start failed: %s\n", strerror(errno)); - wipe_dir(scratch); + oci_mock_wipe_dir(scratch); free(scratch); return 1; } - char *base_url = make_base_url(server.port); + char *base_url = oci_mock_make_base_url(server.port); if (!base_url) { fprintf(stderr, "oom on base url\n"); - mock_server_stop(&server); - wipe_dir(scratch); + oci_mock_server_stop(&server); + oci_mock_wipe_dir(scratch); free(scratch); return 1; } @@ -1446,8 +1055,8 @@ int main(void) if (!f) { fprintf(stderr, "oci_fetcher_new failed\n"); free(base_url); - mock_server_stop(&server); - wipe_dir(scratch); + oci_mock_server_stop(&server); + oci_mock_wipe_dir(scratch); free(scratch); return 1; } @@ -1507,14 +1116,14 @@ int main(void) test_ca_file_wrong_rejected(&server, base_url, scratch); free(base_url); - mock_server_stop(&server); + oci_mock_server_stop(&server); if (getenv("OCI_FETCH_ONLINE")) { printf("oci_fetch (online docker.io)\n"); test_online_dockerhub(); } - wipe_dir(scratch); + oci_mock_wipe_dir(scratch); free(scratch); printf("\nResults: %d/%d passed\n", g_passed, g_total); diff --git a/tests/test-oci-pull.c b/tests/test-oci-pull.c new file mode 100644 index 0000000..44c217c --- /dev/null +++ b/tests/test-oci-pull.c @@ -0,0 +1,772 @@ +/* elfuse oci pull pipeline unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Drives end-to-end pulls against the shared TLS mock server (tests/lib/ + * oci-mock). Each case scripts a router that maps URI -> canned response and + * runs oci_pull, then inspects the resulting blob store and pin file to + * verify: + * + * - tag -> index -> linux/arm64 sub-manifest -> config + layers, with pin + * - tag -> direct manifest (no index) -> config + layers, with pin + * - digest-only ref -> manifest -> config + layers, no pin + * - re-pull short-circuits: no extra blob downloads + * - body digest mismatching Docker-Content-Digest aborts the pull + * - index without linux/arm64 aborts the pull + * + * Manifest, index, and config JSON are generated at runtime so the embedded + * digests stay consistent with the actual bytes the mock will serve. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "oci/blob-store.h" +#include "oci/digest.h" +#include "oci/fetch.h" +#include "oci/manifest.h" +#include "oci/pull.h" +#include "oci/ref.h" +#include "oci/store.h" + +#include "lib/oci-mock.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int g_total = 0; +static int g_passed = 0; + +static void report_pass(const char *name) +{ + g_total++; + g_passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +static void report_fail(const char *name, const char *fmt, ...) +{ + g_total++; + printf(" " RED "FAIL" RESET " %s", name); + if (fmt && *fmt) { + printf(": "); + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + } + printf("\n"); +} + +/* ── Synthetic image generator ───────────────────────────────────── */ + +/* Caller-owned bytes; populated by build_image. layer_bodies stay alive for + * the lifetime of the image_t. + */ +typedef struct { + char *config_json; + size_t config_len; + char config_hex[OCI_DIGEST_HEX_MAX + 1]; + + char *layer_bodies[3]; + size_t layer_lens[3]; + char layer_hex[3][OCI_DIGEST_HEX_MAX + 1]; + size_t nlayers; + + char *manifest_json; + size_t manifest_len; + char manifest_hex[OCI_DIGEST_HEX_MAX + 1]; + + char *index_json; + size_t index_len; + char index_hex[OCI_DIGEST_HEX_MAX + 1]; +} image_t; + +static char *xstrdup_with_len(const char *s, size_t *out_len) +{ + char *r = strdup(s); + *out_len = strlen(s); + return r; +} + +static char *vformat(size_t *out_len, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +static char *vformat(size_t *out_len, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(NULL, 0, fmt, ap); + va_end(ap); + if (n < 0) + return NULL; + char *r = malloc((size_t) n + 1); + if (!r) + return NULL; + va_start(ap, fmt); + vsnprintf(r, (size_t) n + 1, fmt, ap); + va_end(ap); + *out_len = (size_t) n; + return r; +} + +static void hash_bytes(const void *buf, size_t len, char *out_hex) +{ + oci_digest_bytes(OCI_DIGEST_SHA256, buf, len, out_hex); +} + +static int build_image(image_t *img) +{ + memset(img, 0, sizeof(*img)); + + img->layer_bodies[0] = xstrdup_with_len("LAYER-ONE-bytes", + &img->layer_lens[0]); + img->layer_bodies[1] = xstrdup_with_len("LAYER-TWO-bytes-larger-payload", + &img->layer_lens[1]); + img->layer_bodies[2] = xstrdup_with_len("L3", &img->layer_lens[2]); + img->nlayers = 3; + for (size_t i = 0; i < img->nlayers; i++) + hash_bytes(img->layer_bodies[i], img->layer_lens[i], img->layer_hex[i]); + + char *cfg = vformat( + &img->config_len, + "{\"architecture\":\"arm64\",\"os\":\"linux\"," + "\"rootfs\":{\"type\":\"layers\"," + "\"diff_ids\":[\"sha256:%s\",\"sha256:%s\",\"sha256:%s\"]}}", + img->layer_hex[0], img->layer_hex[1], img->layer_hex[2]); + if (!cfg) + return -1; + img->config_json = cfg; + hash_bytes(cfg, img->config_len, img->config_hex); + + char *manifest = vformat( + &img->manifest_len, + "{\"schemaVersion\":2," + "\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"config\":{" + "\"mediaType\":\"application/vnd.oci.image.config.v1+json\"," + "\"digest\":\"sha256:%s\",\"size\":%zu}," + "\"layers\":[" + "{\"mediaType\":\"application/vnd.oci.image.layer.v1.tar+gzip\"," + "\"digest\":\"sha256:%s\",\"size\":%zu}," + "{\"mediaType\":\"application/vnd.oci.image.layer.v1.tar+gzip\"," + "\"digest\":\"sha256:%s\",\"size\":%zu}," + "{\"mediaType\":\"application/vnd.oci.image.layer.v1.tar+gzip\"," + "\"digest\":\"sha256:%s\",\"size\":%zu}]}", + img->config_hex, img->config_len, + img->layer_hex[0], img->layer_lens[0], + img->layer_hex[1], img->layer_lens[1], + img->layer_hex[2], img->layer_lens[2]); + if (!manifest) + return -1; + img->manifest_json = manifest; + hash_bytes(manifest, img->manifest_len, img->manifest_hex); + + char *index = vformat( + &img->index_len, + "{\"schemaVersion\":2," + "\"mediaType\":\"application/vnd.oci.image.index.v1+json\"," + "\"manifests\":[{" + "\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"digest\":\"sha256:%s\",\"size\":%zu," + "\"platform\":{\"architecture\":\"arm64\",\"os\":\"linux\"," + "\"variant\":\"v8\"}}]}", + img->manifest_hex, img->manifest_len); + if (!index) + return -1; + img->index_json = index; + hash_bytes(index, img->index_len, img->index_hex); + return 0; +} + +static void free_image(image_t *img) +{ + free(img->config_json); + for (size_t i = 0; i < img->nlayers; i++) + free(img->layer_bodies[i]); + free(img->manifest_json); + free(img->index_json); + memset(img, 0, sizeof(*img)); +} + +/* ── Mock router ─────────────────────────────────────────────────── */ + +typedef struct { + char path[256]; + int status; + const char *content_type; + char docker_digest[80]; + const void *body; + size_t body_len; + bool has_docker_digest; +} route_t; + +#define ROUTES_MAX 16 + +typedef struct { + route_t routes[ROUTES_MAX]; + size_t nroutes; + /* When non-NULL, the router returns this body in place of routes[0]. Used + * to inject a digest-mismatch case where the registry serves bytes that do + * not hash to the Docker-Content-Digest header. + */ + const void *override_body; + size_t override_body_len; +} router_ctx_t; + +static void router_add(router_ctx_t *ctx, const char *path, int status, + const char *content_type, const char *docker_digest, + const void *body, size_t body_len) +{ + if (ctx->nroutes >= ROUTES_MAX) + return; + route_t *r = &ctx->routes[ctx->nroutes++]; + snprintf(r->path, sizeof(r->path), "%s", path); + r->status = status; + r->content_type = content_type; + r->body = body; + r->body_len = body_len; + if (docker_digest) { + snprintf(r->docker_digest, sizeof(r->docker_digest), "%s", + docker_digest); + r->has_docker_digest = true; + } else { + r->has_docker_digest = false; + } +} + +static void router_handler(oci_mock_server_t *s, oci_mock_io_t *io, + const oci_mock_request_t *req) +{ + router_ctx_t *ctx = oci_mock_handler_ctx(s); + for (size_t i = 0; i < ctx->nroutes; i++) { + const route_t *r = &ctx->routes[i]; + if (strcmp(req->path, r->path) != 0) + continue; + const void *body = r->body; + size_t body_len = r->body_len; + if (i == 0 && ctx->override_body) { + body = ctx->override_body; + body_len = ctx->override_body_len; + } + oci_mock_send_full(io, r->status, + r->status == 200 ? "OK" : "Error", + r->content_type, NULL, + r->has_docker_digest ? r->docker_digest : NULL, + body, body_len); + return; + } + oci_mock_send_full(io, 404, "Not Found", "text/plain", NULL, NULL, + "nope", 4); +} + +/* ── Fixture helpers ─────────────────────────────────────────────── */ + +typedef struct { + char ca_pem_path[256]; + char base_url[64]; + oci_mock_server_t *server; + image_t *img; + char *store_root; +} fixture_t; + +static void populate_routes_index(router_ctx_t *ctx, const image_t *img, + const char *index_dc_digest) +{ + char path[256]; + snprintf(path, sizeof(path), "/v2/library/alpine/manifests/3.20"); + router_add(ctx, path, 200, + "application/vnd.oci.image.index.v1+json", + index_dc_digest, img->index_json, img->index_len); + + snprintf(path, sizeof(path), "/v2/library/alpine/manifests/sha256:%s", + img->manifest_hex); + router_add(ctx, path, 200, + "application/vnd.oci.image.manifest.v1+json", NULL, + img->manifest_json, img->manifest_len); + + snprintf(path, sizeof(path), "/v2/library/alpine/blobs/sha256:%s", + img->config_hex); + router_add(ctx, path, 200, "application/octet-stream", NULL, + img->config_json, img->config_len); + + for (size_t i = 0; i < img->nlayers; i++) { + snprintf(path, sizeof(path), "/v2/library/alpine/blobs/sha256:%s", + img->layer_hex[i]); + router_add(ctx, path, 200, "application/octet-stream", NULL, + img->layer_bodies[i], img->layer_lens[i]); + } +} + +static bool blob_present(oci_store_t *store, const char *hex) +{ + return oci_blob_store_has(oci_store_blobs(store), OCI_DIGEST_SHA256, hex); +} + +static bool all_blobs_present(oci_store_t *store, const image_t *img) +{ + if (!blob_present(store, img->index_hex)) + return false; + if (!blob_present(store, img->manifest_hex)) + return false; + if (!blob_present(store, img->config_hex)) + return false; + for (size_t i = 0; i < img->nlayers; i++) + if (!blob_present(store, img->layer_hex[i])) + return false; + return true; +} + +/* ── Tests ───────────────────────────────────────────────────────── */ + +static void test_pull_index_arm64(fixture_t *fx) +{ + image_t *img = fx->img; + char dc[80]; + snprintf(dc, sizeof(dc), "sha256:%s", img->index_hex); + router_ctx_t ctx = {0}; + populate_routes_index(&ctx, img, dc); + oci_mock_set_handler(fx->server, router_handler, &ctx); + + char root[1024]; + snprintf(root, sizeof(root), "%s/store-idx", fx->store_root); + oci_store_t *store = oci_store_open(root); + if (!store) { + report_fail("pull: tag -> index -> arm64 manifest", "store open"); + return; + } + oci_fetcher_options_t fopts = { + .base_url_override = fx->base_url, + .ca_file = fx->ca_pem_path, + }; + oci_fetcher_t *f = oci_fetcher_new(&fopts); + if (!f) { + report_fail("pull: tag -> index -> arm64 manifest", "fetcher new"); + oci_store_close(store); + return; + } + oci_ref_t ref = {0}; + const char *err = NULL; + if (oci_ref_parse("alpine:3.20", &ref, &err) < 0) { + report_fail("pull: tag -> index -> arm64 manifest", "ref parse"); + oci_fetcher_free(f); + oci_store_close(store); + return; + } + oci_pull_options_t popts = {.quiet = true}; + err = NULL; + int rc = oci_pull(f, store, &ref, &popts, &err); + if (rc != 0) { + report_fail("pull: tag -> index -> arm64 manifest", "rc=%d err=%s", rc, + err ? err : "(none)"); + goto cleanup; + } + if (!all_blobs_present(store, img)) { + report_fail("pull: tag -> index -> arm64 manifest", + "store missing one or more blobs"); + goto cleanup; + } + char *pin = NULL; + if (oci_store_get_ref(store, &ref, &pin, &err) < 0) { + report_fail("pull: tag -> index -> arm64 manifest", "no pin: %s", + err ? err : "?"); + goto cleanup; + } + char want_pin[80]; + snprintf(want_pin, sizeof(want_pin), "sha256:%s", img->index_hex); + if (strcmp(pin, want_pin) != 0) { + report_fail("pull: tag -> index -> arm64 manifest", + "pin=%s want=%s", pin, want_pin); + free(pin); + goto cleanup; + } + free(pin); + report_pass("pull: tag -> index -> arm64 manifest"); + +cleanup: + oci_ref_free(&ref); + oci_fetcher_free(f); + oci_store_close(store); +} + +static void test_pull_direct_manifest(fixture_t *fx) +{ + image_t *img = fx->img; + /* Tag resolves directly to a manifest, no index. */ + router_ctx_t ctx = {0}; + char dc[80]; + snprintf(dc, sizeof(dc), "sha256:%s", img->manifest_hex); + char path[256]; + snprintf(path, sizeof(path), "/v2/library/alpine/manifests/3.20"); + router_add(&ctx, path, 200, + "application/vnd.oci.image.manifest.v1+json", dc, + img->manifest_json, img->manifest_len); + snprintf(path, sizeof(path), "/v2/library/alpine/blobs/sha256:%s", + img->config_hex); + router_add(&ctx, path, 200, "application/octet-stream", NULL, + img->config_json, img->config_len); + for (size_t i = 0; i < img->nlayers; i++) { + snprintf(path, sizeof(path), "/v2/library/alpine/blobs/sha256:%s", + img->layer_hex[i]); + router_add(&ctx, path, 200, "application/octet-stream", NULL, + img->layer_bodies[i], img->layer_lens[i]); + } + oci_mock_set_handler(fx->server, router_handler, &ctx); + + char root[1024]; + snprintf(root, sizeof(root), "%s/store-direct", fx->store_root); + oci_store_t *store = oci_store_open(root); + oci_fetcher_options_t fopts = { + .base_url_override = fx->base_url, + .ca_file = fx->ca_pem_path, + }; + oci_fetcher_t *f = oci_fetcher_new(&fopts); + oci_ref_t ref = {0}; + const char *err = NULL; + oci_ref_parse("alpine:3.20", &ref, &err); + oci_pull_options_t popts = {.quiet = true}; + err = NULL; + int rc = oci_pull(f, store, &ref, &popts, &err); + if (rc != 0) { + report_fail("pull: tag -> direct manifest (no index)", "rc=%d err=%s", + rc, err ? err : "(none)"); + } else if (!blob_present(store, img->manifest_hex)) { + report_fail("pull: tag -> direct manifest (no index)", + "manifest blob missing"); + } else if (blob_present(store, img->index_hex)) { + /* No index was served; the index blob hex must not coincidentally land + * in the store. + */ + report_fail("pull: tag -> direct manifest (no index)", + "index blob unexpectedly present"); + } else { + char *pin = NULL; + char want[80]; + snprintf(want, sizeof(want), "sha256:%s", img->manifest_hex); + if (oci_store_get_ref(store, &ref, &pin, &err) < 0 || + strcmp(pin, want) != 0) { + report_fail("pull: tag -> direct manifest (no index)", + "pin mismatch"); + free(pin); + } else { + free(pin); + report_pass("pull: tag -> direct manifest (no index)"); + } + } + oci_ref_free(&ref); + oci_fetcher_free(f); + oci_store_close(store); +} + +static void test_pull_digest_only(fixture_t *fx) +{ + image_t *img = fx->img; + router_ctx_t ctx = {0}; + char path[256]; + snprintf(path, sizeof(path), "/v2/library/alpine/manifests/sha256:%s", + img->manifest_hex); + router_add(&ctx, path, 200, + "application/vnd.oci.image.manifest.v1+json", NULL, + img->manifest_json, img->manifest_len); + snprintf(path, sizeof(path), "/v2/library/alpine/blobs/sha256:%s", + img->config_hex); + router_add(&ctx, path, 200, "application/octet-stream", NULL, + img->config_json, img->config_len); + for (size_t i = 0; i < img->nlayers; i++) { + snprintf(path, sizeof(path), "/v2/library/alpine/blobs/sha256:%s", + img->layer_hex[i]); + router_add(&ctx, path, 200, "application/octet-stream", NULL, + img->layer_bodies[i], img->layer_lens[i]); + } + oci_mock_set_handler(fx->server, router_handler, &ctx); + + char root[1024]; + snprintf(root, sizeof(root), "%s/store-digest-only", fx->store_root); + oci_store_t *store = oci_store_open(root); + oci_fetcher_options_t fopts = { + .base_url_override = fx->base_url, + .ca_file = fx->ca_pem_path, + }; + oci_fetcher_t *f = oci_fetcher_new(&fopts); + + char ref_str[256]; + snprintf(ref_str, sizeof(ref_str), "alpine@sha256:%s", img->manifest_hex); + oci_ref_t ref = {0}; + const char *err = NULL; + oci_ref_parse(ref_str, &ref, &err); + oci_pull_options_t popts = {.quiet = true}; + err = NULL; + int rc = oci_pull(f, store, &ref, &popts, &err); + if (rc != 0) { + report_fail("pull: digest-only ref", "rc=%d err=%s", rc, + err ? err : "(none)"); + } else if (!blob_present(store, img->manifest_hex)) { + report_fail("pull: digest-only ref", "manifest blob missing"); + } else { + char *pin = NULL; + errno = 0; + int gr = oci_store_get_ref(store, &ref, &pin, &err); + if (gr == 0) { + report_fail("pull: digest-only ref", + "unexpected pin written for digest-only ref"); + free(pin); + } else if (errno != EINVAL) { + report_fail("pull: digest-only ref", + "expected EINVAL on get_ref, got errno=%d", errno); + } else { + report_pass("pull: digest-only ref"); + } + } + oci_ref_free(&ref); + oci_fetcher_free(f); + oci_store_close(store); +} + +static void test_pull_repull_caches(fixture_t *fx) +{ + image_t *img = fx->img; + char dc[80]; + snprintf(dc, sizeof(dc), "sha256:%s", img->index_hex); + router_ctx_t ctx = {0}; + populate_routes_index(&ctx, img, dc); + oci_mock_set_handler(fx->server, router_handler, &ctx); + + char root[1024]; + snprintf(root, sizeof(root), "%s/store-repull", fx->store_root); + oci_store_t *store = oci_store_open(root); + oci_fetcher_options_t fopts = { + .base_url_override = fx->base_url, + .ca_file = fx->ca_pem_path, + }; + oci_fetcher_t *f = oci_fetcher_new(&fopts); + oci_ref_t ref = {0}; + const char *err = NULL; + oci_ref_parse("alpine:3.20", &ref, &err); + oci_pull_options_t popts = {.quiet = true}; + + /* First pull: should download index + manifest + config + 3 layers = 6 + * requests. The mock log clamps at OCI_MOCK_LOG_MAX = 16 so 6 fits. + */ + err = NULL; + if (oci_pull(f, store, &ref, &popts, &err) != 0) { + report_fail("pull: re-pull hits cache", "first pull failed: %s", + err ? err : "(none)"); + goto cleanup; + } + int first_count = oci_mock_request_count(fx->server); + + /* Reset request counter, re-pull. Layers + config should short-circuit + * via oci_blob_store_has. Manifest documents are still re-fetched (no + * manifest cache yet). Expect exactly 2 requests: index + sub-manifest. + */ + oci_mock_set_handler(fx->server, router_handler, &ctx); + err = NULL; + if (oci_pull(f, store, &ref, &popts, &err) != 0) { + report_fail("pull: re-pull hits cache", "second pull failed: %s", + err ? err : "(none)"); + goto cleanup; + } + int second_count = oci_mock_request_count(fx->server); + if (first_count != 6) { + report_fail("pull: re-pull hits cache", + "first pull made %d requests, expected 6", first_count); + goto cleanup; + } + if (second_count != 2) { + report_fail("pull: re-pull hits cache", + "second pull made %d requests, expected 2 (index + " + "manifest)", + second_count); + goto cleanup; + } + report_pass("pull: re-pull hits cache"); + +cleanup: + oci_ref_free(&ref); + oci_fetcher_free(f); + oci_store_close(store); +} + +static void test_pull_docker_digest_mismatch(fixture_t *fx) +{ + image_t *img = fx->img; + /* The mock claims index_hex via Docker-Content-Digest but actually serves + * a different body. The pull must abort before any blob writes happen. + */ + char dc[80]; + snprintf(dc, sizeof(dc), "sha256:%s", img->index_hex); + router_ctx_t ctx = {0}; + populate_routes_index(&ctx, img, dc); + static const char EVIL[] = "{\"schemaVersion\":2,\"evil\":true}"; + ctx.override_body = EVIL; + ctx.override_body_len = strlen(EVIL); + oci_mock_set_handler(fx->server, router_handler, &ctx); + + char root[1024]; + snprintf(root, sizeof(root), "%s/store-mismatch", fx->store_root); + oci_store_t *store = oci_store_open(root); + oci_fetcher_options_t fopts = { + .base_url_override = fx->base_url, + .ca_file = fx->ca_pem_path, + }; + oci_fetcher_t *f = oci_fetcher_new(&fopts); + oci_ref_t ref = {0}; + const char *err = NULL; + oci_ref_parse("alpine:3.20", &ref, &err); + oci_pull_options_t popts = {.quiet = true}; + err = NULL; + errno = 0; + int rc = oci_pull(f, store, &ref, &popts, &err); + if (rc == 0) { + report_fail("pull: body digest != Docker-Content-Digest", + "rc=0 (expected -1)"); + } else if (errno != EPROTO) { + report_fail("pull: body digest != Docker-Content-Digest", + "errno=%d (expected EPROTO)", errno); + } else { + char *pin = NULL; + errno = 0; + if (oci_store_get_ref(store, &ref, &pin, &err) == 0) { + report_fail("pull: body digest != Docker-Content-Digest", + "pin unexpectedly written"); + free(pin); + } else if (errno != ENOENT) { + report_fail("pull: body digest != Docker-Content-Digest", + "get_ref errno=%d (expected ENOENT)", errno); + } else { + report_pass("pull: body digest != Docker-Content-Digest"); + } + } + oci_ref_free(&ref); + oci_fetcher_free(f); + oci_store_close(store); +} + +static void test_pull_index_no_arm64(fixture_t *fx) +{ + /* An index that only lists amd64 has no usable sub-manifest. */ + char index[512]; + int n = snprintf(index, sizeof(index), + "{\"schemaVersion\":2," + "\"mediaType\":\"application/vnd.oci.image.index.v1+json\"," + "\"manifests\":[{" + "\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"digest\":\"sha256:0000000000000000000000000000" + "000000000000000000000000000000000000\"," + "\"size\":1," + "\"platform\":{\"architecture\":\"amd64\",\"os\":\"linux\"}}]}"); + char hex[OCI_DIGEST_HEX_MAX + 1]; + hash_bytes(index, (size_t) n, hex); + char dc[80]; + snprintf(dc, sizeof(dc), "sha256:%s", hex); + + router_ctx_t ctx = {0}; + router_add(&ctx, "/v2/library/alpine/manifests/3.20", 200, + "application/vnd.oci.image.index.v1+json", dc, index, + (size_t) n); + oci_mock_set_handler(fx->server, router_handler, &ctx); + + char root[1024]; + snprintf(root, sizeof(root), "%s/store-no-arm64", fx->store_root); + oci_store_t *store = oci_store_open(root); + oci_fetcher_options_t fopts = { + .base_url_override = fx->base_url, + .ca_file = fx->ca_pem_path, + }; + oci_fetcher_t *f = oci_fetcher_new(&fopts); + oci_ref_t ref = {0}; + const char *err = NULL; + oci_ref_parse("alpine:3.20", &ref, &err); + oci_pull_options_t popts = {.quiet = true}; + err = NULL; + errno = 0; + int rc = oci_pull(f, store, &ref, &popts, &err); + if (rc == 0) { + report_fail("pull: index without linux/arm64", "rc=0"); + } else if (errno != ENOENT) { + report_fail("pull: index without linux/arm64", + "errno=%d (expected ENOENT)", errno); + } else { + report_pass("pull: index without linux/arm64"); + } + oci_ref_free(&ref); + oci_fetcher_free(f); + oci_store_close(store); +} + +/* ── main ────────────────────────────────────────────────────────── */ + +int main(void) +{ + if (curl_global_sslset(CURLSSLBACKEND_OPENSSL, NULL, NULL) != + CURLSSLSET_OK) { + fprintf(stderr, + "libcurl OpenSSL backend not available; pull tests cannot run\n"); + return 1; + } + SSL_library_init(); + OpenSSL_add_all_algorithms(); + SSL_load_error_strings(); + + char *scratch = oci_mock_make_scratch_root("elfuse-oci-pull"); + if (!scratch) { + fprintf(stderr, "mkdtemp failed: %s\n", strerror(errno)); + return 1; + } + oci_mock_server_t server; + if (oci_mock_server_start(&server, scratch) != 0) { + fprintf(stderr, "mock server start failed: %s\n", strerror(errno)); + oci_mock_wipe_dir(scratch); + free(scratch); + return 1; + } + char *base_url = oci_mock_make_base_url(server.port); + + image_t img; + if (build_image(&img) < 0) { + fprintf(stderr, "build_image failed\n"); + oci_mock_server_stop(&server); + free(base_url); + oci_mock_wipe_dir(scratch); + free(scratch); + return 1; + } + + fixture_t fx = {0}; + snprintf(fx.ca_pem_path, sizeof(fx.ca_pem_path), "%s", server.ca_pem_path); + snprintf(fx.base_url, sizeof(fx.base_url), "%s", base_url); + fx.server = &server; + fx.img = &img; + fx.store_root = scratch; + + printf("oci_pull (mock HTTPS @ %s, CA=%s)\n", base_url, server.ca_pem_path); + + test_pull_index_arm64(&fx); + test_pull_direct_manifest(&fx); + test_pull_digest_only(&fx); + test_pull_repull_caches(&fx); + test_pull_docker_digest_mismatch(&fx); + test_pull_index_no_arm64(&fx); + + free_image(&img); + free(base_url); + oci_mock_server_stop(&server); + oci_mock_wipe_dir(scratch); + free(scratch); + + printf("\nResults: %d/%d passed\n", g_passed, g_total); + return g_passed == g_total ? 0 : 1; +} diff --git a/tests/test-oci-store.c b/tests/test-oci-store.c new file mode 100644 index 0000000..c8ff352 --- /dev/null +++ b/tests/test-oci-store.c @@ -0,0 +1,486 @@ +/* Local OCI image store unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Drives the pin / unpin / open invariants of src/oci/store.c against an + * mkdtemp scratch root: open layout creation, put + get round trip, miss + * surfaces ENOENT, digest-only refs are rejected (their digest is the pin), + * malformed digest input is rejected, deep repository slashes get mkdir -p, + * and the underlying blob store handle survives the wrapping store. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oci/blob-store.h" +#include "oci/digest.h" +#include "oci/ref.h" +#include "oci/store.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int total = 0; +static int passed = 0; + +static void report_pass(const char *name) +{ + total++; + passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *detail) +{ + total++; + printf(" " RED "FAIL" RESET " %s: %s\n", name, detail ? detail : ""); +} + +static int remove_entry(const char *path, const struct stat *st, int typeflag, + struct FTW *ftwbuf) +{ + (void) st; + (void) typeflag; + (void) ftwbuf; + return remove(path); +} + +static void wipe_dir(const char *root) +{ + (void) nftw(root, remove_entry, 8, FTW_DEPTH | FTW_PHYS); +} + +static char *make_scratch_root(void) +{ + char tmpl[] = "/tmp/elfuse-test-oci-store-XXXXXX"; + char *p = mkdtemp(tmpl); + if (!p) + return NULL; + return strdup(p); +} + +/* The pin digest used across cases. SHA-256 of "abc"; the same value verified + * by test-oci-digest and test-oci-blob-store so the suites cross-reference. + */ +static const char DIGEST_ABC[] = + "sha256:ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"; + +static bool parse_ref(const char *s, oci_ref_t *out) +{ + const char *err = NULL; + if (oci_ref_parse(s, out, &err) < 0) { + fprintf(stderr, "ref parse failed for %s: %s\n", s, err ? err : "?"); + return false; + } + return true; +} + +static void test_open_creates_layout(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-open", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("open_creates_layout", "oci_store_open returned NULL"); + return; + } + struct stat st; + char path[2048]; + snprintf(path, sizeof(path), "%s/blobs/sha256", root); + if (stat(path, &st) != 0 || !S_ISDIR(st.st_mode)) { + report_fail("open_creates_layout", "blobs/sha256 missing"); + oci_store_close(s); + return; + } + snprintf(path, sizeof(path), "%s/refs", root); + if (stat(path, &st) != 0 || !S_ISDIR(st.st_mode)) { + report_fail("open_creates_layout", "refs/ missing"); + oci_store_close(s); + return; + } + if (!oci_store_blobs(s)) { + report_fail("open_creates_layout", "blobs handle is NULL"); + oci_store_close(s); + return; + } + if (strcmp(oci_store_root(s), root) != 0) { + report_fail("open_creates_layout", "root string mismatch"); + oci_store_close(s); + return; + } + oci_store_close(s); + report_pass("open_creates_layout"); +} + +static void test_put_get_round_trip(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-roundtrip", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("put_get_round_trip", "open failed"); + return; + } + oci_ref_t ref = {0}; + if (!parse_ref("alpine:3.20", &ref)) { + report_fail("put_get_round_trip", "ref parse failed"); + oci_store_close(s); + return; + } + const char *err = NULL; + if (oci_store_put_ref(s, &ref, DIGEST_ABC, &err) < 0) { + report_fail("put_get_round_trip", err ? err : "put failed"); + goto cleanup; + } + char *got = NULL; + if (oci_store_get_ref(s, &ref, &got, &err) < 0) { + report_fail("put_get_round_trip", err ? err : "get failed"); + goto cleanup; + } + if (!got || strcmp(got, DIGEST_ABC) != 0) { + report_fail("put_get_round_trip", "digest mismatch"); + free(got); + goto cleanup; + } + free(got); + + /* Pin file lives at /refs/docker.io/library/alpine/3.20 */ + struct stat st; + char path[2048]; + snprintf(path, sizeof(path), "%s/refs/docker.io/library/alpine/3.20", root); + if (stat(path, &st) != 0 || !S_ISREG(st.st_mode)) { + report_fail("put_get_round_trip", "pin file not at expected path"); + goto cleanup; + } + report_pass("put_get_round_trip"); + +cleanup: + oci_ref_free(&ref); + oci_store_close(s); +} + +static void test_get_miss_enoent(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-miss", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("get_miss_enoent", "open failed"); + return; + } + oci_ref_t ref = {0}; + if (!parse_ref("ghcr.io/owner/img:tag", &ref)) { + report_fail("get_miss_enoent", "ref parse failed"); + oci_store_close(s); + return; + } + char *got = NULL; + errno = 0; + const char *err = NULL; + int rc = oci_store_get_ref(s, &ref, &got, &err); + if (rc == 0 || errno != ENOENT) { + report_fail("get_miss_enoent", "expected -1 with ENOENT"); + free(got); + } else if (got != NULL) { + report_fail("get_miss_enoent", "out_digest must be NULL on miss"); + } else { + report_pass("get_miss_enoent"); + } + oci_ref_free(&ref); + oci_store_close(s); +} + +static void test_digest_only_ref_rejected(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-digest-only", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("digest_only_ref_rejected", "open failed"); + return; + } + oci_ref_t ref = {0}; + const char *err = NULL; + if (oci_ref_parse( + "alpine@sha256:" + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad", + &ref, &err) < 0) { + report_fail("digest_only_ref_rejected", err ? err : "ref parse failed"); + oci_store_close(s); + return; + } + if (ref.tag != NULL) { + report_fail("digest_only_ref_rejected", + "digest-only ref unexpectedly carries a tag"); + oci_ref_free(&ref); + oci_store_close(s); + return; + } + err = NULL; + errno = 0; + int rc = oci_store_put_ref(s, &ref, DIGEST_ABC, &err); + if (rc == 0 || errno != EINVAL) { + report_fail("digest_only_ref_rejected", "expected EINVAL on put"); + } else { + report_pass("digest_only_ref_rejected"); + } + oci_ref_free(&ref); + oci_store_close(s); +} + +static void test_malformed_digest_rejected(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-bad-digest", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("malformed_digest_rejected", "open failed"); + return; + } + oci_ref_t ref = {0}; + if (!parse_ref("alpine:3.20", &ref)) { + report_fail("malformed_digest_rejected", "ref parse failed"); + oci_store_close(s); + return; + } + const char *err = NULL; + errno = 0; + int rc = oci_store_put_ref(s, &ref, "not-a-digest", &err); + if (rc == 0 || errno != EINVAL) { + report_fail("malformed_digest_rejected", "expected EINVAL on put"); + } else { + report_pass("malformed_digest_rejected"); + } + oci_ref_free(&ref); + oci_store_close(s); +} + +static void test_deep_repository_mkdir(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-deep", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("deep_repository_mkdir", "open failed"); + return; + } + oci_ref_t ref = {0}; + if (!parse_ref("ghcr.io/owner/group/sub/img:v1.0", &ref)) { + report_fail("deep_repository_mkdir", "ref parse failed"); + oci_store_close(s); + return; + } + const char *err = NULL; + if (oci_store_put_ref(s, &ref, DIGEST_ABC, &err) < 0) { + report_fail("deep_repository_mkdir", err ? err : "put failed"); + goto cleanup; + } + struct stat st; + char path[2048]; + snprintf(path, sizeof(path), + "%s/refs/ghcr.io/owner/group/sub/img/v1.0", root); + if (stat(path, &st) != 0 || !S_ISREG(st.st_mode)) { + report_fail("deep_repository_mkdir", "deep pin not at expected path"); + goto cleanup; + } + report_pass("deep_repository_mkdir"); + +cleanup: + oci_ref_free(&ref); + oci_store_close(s); +} + +static void test_overwrite_pin(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-overwrite", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("overwrite_pin", "open failed"); + return; + } + oci_ref_t ref = {0}; + if (!parse_ref("alpine:3.20", &ref)) { + report_fail("overwrite_pin", "ref parse failed"); + oci_store_close(s); + return; + } + static const char SECOND[] = + "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852" + "b855"; + const char *err = NULL; + if (oci_store_put_ref(s, &ref, DIGEST_ABC, &err) < 0) { + report_fail("overwrite_pin", err ? err : "first put failed"); + goto cleanup; + } + if (oci_store_put_ref(s, &ref, SECOND, &err) < 0) { + report_fail("overwrite_pin", err ? err : "second put failed"); + goto cleanup; + } + char *got = NULL; + if (oci_store_get_ref(s, &ref, &got, &err) < 0) { + report_fail("overwrite_pin", err ? err : "get failed"); + goto cleanup; + } + if (!got || strcmp(got, SECOND) != 0) { + report_fail("overwrite_pin", "pin was not overwritten"); + free(got); + goto cleanup; + } + free(got); + report_pass("overwrite_pin"); + +cleanup: + oci_ref_free(&ref); + oci_store_close(s); +} + +static void test_pin_blob_share_root(const char *scratch) +{ + char root[1024]; + snprintf(root, sizeof(root), "%s/case-share", scratch); + oci_store_t *s = oci_store_open(root); + if (!s) { + report_fail("pin_blob_share_root", "open failed"); + return; + } + oci_blob_store_t *blobs = oci_store_blobs(s); + static const char ABC[] = "abc"; + static const char ABC_HEX[] = + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"; + if (oci_blob_store_put_bytes(blobs, OCI_DIGEST_SHA256, ABC_HEX, ABC, + sizeof(ABC) - 1) < 0) { + report_fail("pin_blob_share_root", "blob put failed"); + oci_store_close(s); + return; + } + oci_ref_t ref = {0}; + if (!parse_ref("alpine:3.20", &ref)) { + report_fail("pin_blob_share_root", "ref parse failed"); + oci_store_close(s); + return; + } + const char *err = NULL; + if (oci_store_put_ref(s, &ref, DIGEST_ABC, &err) < 0) { + report_fail("pin_blob_share_root", err ? err : "put_ref failed"); + goto cleanup; + } + if (!oci_blob_store_has(blobs, OCI_DIGEST_SHA256, ABC_HEX)) { + report_fail("pin_blob_share_root", "blob disappeared after pin"); + goto cleanup; + } + char *got = NULL; + if (oci_store_get_ref(s, &ref, &got, &err) < 0 || + strcmp(got, DIGEST_ABC) != 0) { + report_fail("pin_blob_share_root", "pin disappeared after blob"); + free(got); + goto cleanup; + } + free(got); + report_pass("pin_blob_share_root"); + +cleanup: + oci_ref_free(&ref); + oci_store_close(s); +} + +static void test_default_root_from_env(void) +{ + /* Save and clear environment so the default-root computation is fully + * deterministic within the test. */ + char *saved_xdg = NULL; + const char *cur_xdg = getenv("XDG_DATA_HOME"); + if (cur_xdg) + saved_xdg = strdup(cur_xdg); + char *saved_home = NULL; + const char *cur_home = getenv("HOME"); + if (cur_home) + saved_home = strdup(cur_home); + + /* XDG path takes precedence. */ + setenv("XDG_DATA_HOME", "/tmp/elfuse-xdg-test", 1); + setenv("HOME", "/tmp/elfuse-home-test", 1); + char *r1 = oci_store_default_root(); + if (!r1 || strcmp(r1, "/tmp/elfuse-xdg-test/elfuse/store") != 0) { + report_fail("default_root_from_env", + "XDG_DATA_HOME path not respected"); + free(r1); + goto restore; + } + free(r1); + + /* Fall back to HOME when XDG is unset. */ + unsetenv("XDG_DATA_HOME"); + char *r2 = oci_store_default_root(); + if (!r2 || + strcmp(r2, + "/tmp/elfuse-home-test/Library/Application Support/elfuse/store") + != 0) { + report_fail("default_root_from_env", + "HOME fallback path not respected"); + free(r2); + goto restore; + } + free(r2); + + /* Neither set: errno=ENOENT. */ + unsetenv("HOME"); + errno = 0; + char *r3 = oci_store_default_root(); + if (r3 || errno != ENOENT) { + report_fail("default_root_from_env", + "expected NULL with ENOENT when no env present"); + free(r3); + goto restore; + } + report_pass("default_root_from_env"); + +restore: + if (saved_xdg) + setenv("XDG_DATA_HOME", saved_xdg, 1); + else + unsetenv("XDG_DATA_HOME"); + if (saved_home) + setenv("HOME", saved_home, 1); + else + unsetenv("HOME"); + free(saved_xdg); + free(saved_home); +} + +int main(void) +{ + printf("OCI store unit tests\n"); + char *scratch = make_scratch_root(); + if (!scratch) { + fprintf(stderr, "could not create scratch dir: %s\n", strerror(errno)); + return 1; + } + + test_open_creates_layout(scratch); + test_put_get_round_trip(scratch); + test_get_miss_enoent(scratch); + test_digest_only_ref_rejected(scratch); + test_malformed_digest_rejected(scratch); + test_deep_repository_mkdir(scratch); + test_overwrite_pin(scratch); + test_pin_blob_share_root(scratch); + test_default_root_from_env(); + + wipe_dir(scratch); + free(scratch); + + printf("\n%s/%d store tests passed\n", passed == total ? GREEN : RED, + total); + printf("%d/%d\n" RESET, passed, total); + return passed == total ? 0 : 1; +} From 0ec6b84e6a91141bedce2f94162f3cf096efada0 Mon Sep 17 00:00:00 2001 From: Max042004 Date: Fri, 15 May 2026 20:06:07 +0800 Subject: [PATCH 7/7] Add OCI offline manifest tree renderer for elfuse oci inspect Slice 5b of Phase 1 from issue #31. Closes out Phase 1 by giving elfuse oci inspect an actual function beyond the slice-1 canonical-ref print: it reads the local store the slice 5a pull pipeline populated and renders the manifest graph without touching the network. Phase 2 follows: sparse APFS volume bootstrap, layer unpack with whiteouts, clonefile copy-up. src/oci/inspect.{c,h} owns the offline renderer. oci_inspect resolves the manifest digest in three steps: 1. ref->digest when set (digest-pinned reference) 2. pin file /refs/// when ref->tag is set 3. Neither: print "(no local manifest; run 'elfuse oci pull' first)" on stdout and return 0. This preserves the slice-1 inspect smoke output shape for refs that were never pulled. The pinned digest goes through oci_digest_parse to reject corrupt pin files, then read_blob_file slurps /blobs// into a heap buffer. read_blob_file caps the read at 64 MiB (real manifests are well under 1 MiB; the cap prevents a corrupted store from forcing a pathological malloc) and reports errno=ENOENT when the blob file is absent. Classification between index and manifest is structural: the slice-3 parsers reject disjoint shapes (oci_index_parse requires a manifests array; oci_manifest_parse requires config + layers), so trying index first and falling back to manifest is unambiguous. Image config blobs never reach this path because pins point at manifest-shaped blobs. Index rendering prints a platforms table. Default mode shows only the picked linux/arm64 entry (tagged "[arm64]") and drills into the sub-manifest blob to print its config descriptor + layer table. The --all-platforms flag lists every platform entry and skips the drill; the flag answers "what does this image cover", not "what is inside the arm64 variant". Both decisions are documented inline at the oci_inspect_options_t definition. Failure mode for a partial store: index loads fine but the linux/arm64 sub-manifest blob is missing. The platform table still goes to stdout (the user sees what is available), a warning lands on stderr, and the call returns -1 with errno=ENOENT and err_msg = "indexed manifest blob missing from local store". Scripts key on the exit code; humans read the table. The errno is preserved across the cleanup goto in the same shape slice-5a oci_pull adopted. Digest formatting follows the slice-5a progress lines for visual consistency: full digests appear in the pinned: line and in index entry tagging (so users can copy / grep the exact value), and a 22- column short form ("sha256:" + 12 hex + "...") appears in the layer tables. short_digest takes a caller-supplied buffer so two short digests in one printf do not clobber a shared static. src/oci/cli.c grows parse_inspect_args + a cmd_inspect rewrite. The new flag set is --store DIR (override the platform default) and --all-platforms (the flag described above); the canonical-ref header print stays in cli.c so the slice-1 smoke output continues working when the store has no record. After the header, cmd_inspect opens the store and calls oci_inspect. rc 0 means success or pin miss; rc 1 means a real failure (malformed blob, blob missing, IO). tests/test-oci-inspect.c drives 6 cases against a pre-populated scratch store. The store is built directly with oci_blob_store_put_ bytes + oci_store_put_ref, not through oci_pull, so the test stays independent of the slice-4 fetcher and the slice-5a pipeline. open_memstream captures stdout into a heap buffer and the assertions grep for distinctive substrings (digest hex prefixes, "[arm64]", section headers) so format tweaks do not cause spurious failures. The 6 cases are: a direct image manifest (config + 2 layers, asserts no [2] index appears so off-by-one shows up); an image index where default mode drills the arm64 sub-manifest and amd64 / s390x stay hidden; the same index with --all-platforms (all three platforms listed, drill section absent); a pin miss for an unknown tag (rc=0, informational line); a digest reference whose blob is absent (rc=-1, errno=ENOENT, "error: manifest blob ... not found"); and the index-ok sub-manifest-missing case (stdout still has the platform table, rc=-1, errno=ENOENT, err_msg identifies the missing inner blob). The last case dup2's stderr to /dev/null around the run so the warning line does not pollute the test driver output. Makefile adds oci/inspect.c to SRCS. mk/config.mk registers tests/test-oci-inspect.c in NATIVE_TESTS so the cross-compile pattern rule skips it. The new link rule pulls in inspect.o, store.o, blob-store.o, digest.o, manifest.o, media-type.o, ref.o, and cJSON; no libcurl, no openssl. mk/tests.mk gains a test-oci-inspect target and runs it as a make-check stage after OCI-pull. make check stays fully green: 78 unit tests; busybox 81/0/3; proctitle low-stack; procfs-exec; timeout-disable; OCI-ref 34/34; OCI-digest 25/25; OCI-blob-store 14/14; OCI-manifest 76/76; OCI-fetch 15/15; OCI-store 9/9; OCI-pull 6/6; OCI-inspect 6/6. make test-oci-fetch-online (opt-in) still passes. elfuse oci inspect now has a real second pane: the slice-1 canonical header followed by either the rendered manifest tree or a clear "never pulled" notice. prune and list still return rc=2. --- Makefile | 10 +- mk/config.mk | 3 +- mk/tests.mk | 7 + src/oci/cli.c | 121 +++++++- src/oci/inspect.c | 394 ++++++++++++++++++++++++++ src/oci/inspect.h | 61 ++++ tests/test-oci-inspect.c | 593 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 1174 insertions(+), 15 deletions(-) create mode 100644 src/oci/inspect.c create mode 100644 src/oci/inspect.h create mode 100644 tests/test-oci-inspect.c diff --git a/Makefile b/Makefile index 3b303df..cdd1dd3 100644 --- a/Makefile +++ b/Makefile @@ -72,7 +72,8 @@ SRCS := \ oci/manifest.c \ oci/fetch.c \ oci/store.c \ - oci/pull.c + oci/pull.c \ + oci/inspect.c SRCS := $(addprefix src/,$(SRCS)) OBJS := $(patsubst src/%.c,$(BUILD_DIR)/%.o,$(SRCS)) @@ -199,6 +200,13 @@ $(BUILD_DIR)/test-oci-pull: $(BUILD_DIR)/test-oci-pull.o $(BUILD_DIR)/lib/oci-mo @echo " LD $@" $(Q)$(CC) $(CFLAGS) -o $@ $^ -lcurl -lpthread $(OPENSSL_LDFLAGS) +## Build the OCI inspect renderer unit test (native macOS, no HVF). Pure +## offline: no fetcher, no mock server, no libcurl. Pre-populates the store +## via oci_blob_store_put_bytes + oci_store_put_ref. +$(BUILD_DIR)/test-oci-inspect: $(BUILD_DIR)/test-oci-inspect.o $(BUILD_DIR)/oci/inspect.o $(BUILD_DIR)/oci/store.o $(BUILD_DIR)/oci/blob-store.o $(BUILD_DIR)/oci/digest.o $(BUILD_DIR)/oci/manifest.o $(BUILD_DIR)/oci/media-type.o $(BUILD_DIR)/oci/ref.o $(CJSON_OBJ) | $(BUILD_DIR) + @echo " LD $@" + $(Q)$(CC) $(CFLAGS) -o $@ $^ + # ── Guest test binaries (cross-compiled, aarch64-linux) ────────── # Only used when GUEST_TEST_BINARIES is not set. diff --git a/mk/config.mk b/mk/config.mk index 02ee60f..72e91ca 100644 --- a/mk/config.mk +++ b/mk/config.mk @@ -18,7 +18,8 @@ endif NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c tests/test-oci-ref.c \ tests/test-oci-digest.c tests/test-oci-blob-store.c \ tests/test-oci-manifest.c tests/test-oci-fetch.c \ - tests/test-oci-store.c tests/test-oci-pull.c + tests/test-oci-store.c tests/test-oci-pull.c \ + tests/test-oci-inspect.c SPECIAL_TEST_SRCS := tests/test-lowbase-mem.c SPECIAL_TEST_BINS := $(BUILD_DIR)/test-lowbase-mem-200000 $(BUILD_DIR)/test-lowbase-mem-300000 diff --git a/mk/tests.mk b/mk/tests.mk index 2a162cf..b0f73de 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -8,6 +8,7 @@ test-full test-multi-vcpu test-rwx \ test-oci-ref test-oci-digest test-oci-blob-store test-oci-manifest \ test-oci-fetch test-oci-fetch-online test-oci-store test-oci-pull \ + test-oci-inspect \ test-sysroot-rename \ test-case-collision test-case-collision-fallback test-sysroot-create-paths \ test-proctitle-low-stack \ @@ -48,6 +49,8 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage @$(MAKE) --no-print-directory test-oci-store @printf "\n$(BLUE)━━━ OCI pull pipeline unit tests ━━━$(RESET)\n" @$(MAKE) --no-print-directory test-oci-pull + @printf "\n$(BLUE)━━━ OCI inspect renderer unit tests ━━━$(RESET)\n" + @$(MAKE) --no-print-directory test-oci-inspect ## Run the OCI image reference parser unit tests (native, no HVF) test-oci-ref: $(BUILD_DIR)/test-oci-ref @@ -84,6 +87,10 @@ test-oci-store: $(BUILD_DIR)/test-oci-store test-oci-pull: $(BUILD_DIR)/test-oci-pull @$(BUILD_DIR)/test-oci-pull +## Run the OCI inspect renderer unit tests (native, no HVF, no network) +test-oci-inspect: $(BUILD_DIR)/test-oci-inspect + @$(BUILD_DIR)/test-oci-inspect + test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename @tmpdir=$$(mktemp -d); \ trap 'rm -rf "$$tmpdir"; rm -f /tmp/elfuse-sysroot-rename-dst.txt' EXIT; \ diff --git a/src/oci/cli.c b/src/oci/cli.c index 3b9fc59..db58192 100644 --- a/src/oci/cli.c +++ b/src/oci/cli.c @@ -5,9 +5,10 @@ * * Slice 5a turns pull into a real subcommand: argument parsing for --store, * -u USER[:PASS], --insecure-ca PEM, --insecure, -q, plus the actual oci_pull - * invocation against a freshly opened store and fetcher. inspect, prune, and - * list still rely on inspect's slice-1 canonical-ref print or return rc=2 - * "not implemented yet" (inspect's offline rendering lands in slice 5b). + * invocation against a freshly opened store and fetcher. Slice 5b extends + * inspect with --store and --all-platforms and an offline manifest tree + * renderer (src/oci/inspect.c). prune and list still return rc=2 "not + * implemented yet". */ #include "cli.h" @@ -18,6 +19,7 @@ #include #include "fetch.h" +#include "inspect.h" #include "pull.h" #include "ref.h" #include "store.h" @@ -28,10 +30,10 @@ static int print_usage(FILE *out) "usage: elfuse oci [args]\n" "\n" "Subcommands:\n" - " pull [OPTIONS] Download an image into the local store\n" - " inspect Show the canonical reference and parsed fields\n" - " prune Remove unreferenced blobs from the local store\n" - " list List images in the local store\n" + " pull [OPTIONS] Download an image into the local store\n" + " inspect [OPTIONS] Show the canonical reference and parsed fields\n" + " prune Remove unreferenced blobs from the local store\n" + " list List images in the local store\n" "\n" "Pull options:\n" " --store DIR Override the local store root\n" @@ -41,6 +43,11 @@ static int print_usage(FILE *out) " --insecure Skip TLS verify (loopback registries only)\n" " -q, --quiet Suppress per-blob progress output\n" "\n" + "Inspect options:\n" + " --store DIR Override the local store root\n" + " --all-platforms List every platform entry of an image index\n" + " instead of drilling into linux/arm64\n" + "\n" "Refs follow the docker/containerd grammar:\n" " alpine, alpine:3.20, user/repo, ghcr.io/owner/img:tag,\n" " repo@sha256:, repo:tag@sha256:\n", @@ -48,15 +55,67 @@ static int print_usage(FILE *out) return out == stderr ? 2 : 0; } +/* Argument parser state for `oci inspect`. Mirrors pull_args_t in shape so a + * future cleanup could share the flag-loop, but the option set is disjoint + * enough that today the two parsers live side by side. + */ +typedef struct { + const char *store_root; + bool show_all_platforms; + const char *ref_str; +} inspect_args_t; + +static int parse_inspect_args(int argc, char **argv, inspect_args_t *out) +{ + int i = 1; + while (i < argc) { + const char *a = argv[i]; + if (a[0] != '-') + break; + if (!strcmp(a, "--")) { + i++; + break; + } + if (!strcmp(a, "-h") || !strcmp(a, "--help")) { + return 1; + } else if (!strcmp(a, "--all-platforms")) { + out->show_all_platforms = true; + } else if (!strcmp(a, "--store")) { + if (++i >= argc) { + fputs("error: --store needs an argument\n", stderr); + return -1; + } + out->store_root = argv[i]; + } else { + fprintf(stderr, "error: unknown inspect option: %s\n", a); + return -1; + } + i++; + } + if (i >= argc) { + fputs("error: inspect needs a reference argument\n", stderr); + return -1; + } + if (i != argc - 1) { + fputs("error: extra arguments after inspect reference\n", stderr); + return -1; + } + out->ref_str = argv[i]; + return 0; +} + static int cmd_inspect(int argc, char **argv) { - if (argc != 2) { - fputs("error: inspect takes exactly one reference argument\n", stderr); + inspect_args_t args = {0}; + int prc = parse_inspect_args(argc, argv, &args); + if (prc == 1) + return print_usage(stdout); + if (prc < 0) return 2; - } - oci_ref_t ref; + + oci_ref_t ref = {0}; const char *err = NULL; - if (oci_ref_parse(argv[1], &ref, &err) < 0) { + if (oci_ref_parse(args.ref_str, &ref, &err) < 0) { fprintf(stderr, "error: %s\n", err ? err : "invalid reference"); return 1; } @@ -72,8 +131,44 @@ static int cmd_inspect(int argc, char **argv) printf("tag: %s\n", ref.tag ? ref.tag : "(none)"); printf("digest: %s\n", ref.digest ? ref.digest : "(none)"); free(canonical); + + /* Resolve store root: --store override or platform default. */ + char *default_root = NULL; + const char *store_root = args.store_root; + if (!store_root) { + default_root = oci_store_default_root(); + if (!default_root) { + fprintf(stderr, + "error: could not determine default store root " + "(HOME not set?)\n"); + oci_ref_free(&ref); + return 1; + } + store_root = default_root; + } + + oci_store_t *store = oci_store_open(store_root); + if (!store) { + fprintf(stderr, "error: could not open store at %s: %s\n", store_root, + strerror(errno)); + oci_ref_free(&ref); + free(default_root); + return 1; + } + + oci_inspect_options_t opts = { + .out = stdout, + .show_all_platforms = args.show_all_platforms, + }; + err = NULL; + int rc = oci_inspect(store, &ref, &opts, &err); + if (rc < 0 && err) + fprintf(stderr, "error: %s\n", err); + + oci_store_close(store); oci_ref_free(&ref); - return 0; + free(default_root); + return rc < 0 ? 1 : 0; } /* Argument parser state for `oci pull`. Defaults are populated by the caller, diff --git a/src/oci/inspect.c b/src/oci/inspect.c new file mode 100644 index 0000000..1712ca3 --- /dev/null +++ b/src/oci/inspect.c @@ -0,0 +1,394 @@ +/* Offline manifest tree renderer for elfuse oci inspect + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Reads the blob the local pin points at, classifies it as an image index or + * image manifest, and prints a tree. No network, no fetcher. The manifest + * model from slice 3 enforces every digest is lowercase and every descriptor + * size is non-negative, so the renderer can trust its inputs once the parse + * returns 0. + * + * Detection between index and manifest is structural: oci_index_parse refuses + * a body that has no "manifests" array, oci_manifest_parse refuses a body + * that has no "config" + "layers" pair. The two parsers therefore reject + * disjoint shapes, and trying one then the other is unambiguous. Image + * configs never reach this code path because pins point at manifest-shaped + * blobs (slice 5a stores the manifest body it received from the registry). + */ + +#include "inspect.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blob-store.h" +#include "digest.h" +#include "manifest.h" +#include "media-type.h" + +/* Upper bound on a manifest/index body. Real manifests are well under 1 MiB; + * a 64 MiB cap is generous and prevents a corrupted store from forcing a + * pathological malloc. + */ +#define INSPECT_BODY_MAX ((size_t) 64 * 1024 * 1024) + +/* Render a digest in two compact forms: + * + * - short_digest("sha256:abcdef0123456789...") + * -> "sha256:abcdef012345..." (first 19 chars + "...") + * + * Matches the slice 5a pull progress line so the two surfaces stay visually + * consistent. The caller-supplied buffer keeps the function reentrant; using + * one static buffer would clobber on the second %s in a single printf. + */ +static void short_digest(const char *full, char out[24]) +{ + if (!full) { + snprintf(out, 24, "(null)"); + return; + } + size_t len = strlen(full); + if (len <= 22) { + snprintf(out, 24, "%s", full); + return; + } + snprintf(out, 24, "%.19s...", full); +} + +/* Compose a "linux/arm64/v8" string from a parsed platform descriptor. The + * variant suffix is omitted when the variant field is empty so a platform + * with no variant prints as "linux/amd64" rather than "linux/amd64/". + */ +static void render_platform(const oci_platform_t *p, char out[64]) +{ + const char *os = p->os && *p->os ? p->os : "?"; + const char *arch = p->architecture && *p->architecture ? p->architecture + : "?"; + if (p->variant && *p->variant) { + snprintf(out, 64, "%s/%s/%s", os, arch, p->variant); + } else { + snprintf(out, 64, "%s/%s", os, arch); + } +} + +/* Open /blobs// and slurp the contents into a fresh + * heap buffer. NUL-terminates the buffer so the slice 3 parsers (which accept + * exact-length bytes) can also be fed as C strings if a caller wants. On + * miss returns -1 with errno=ENOENT; on read failure returns -1 with errno + * preserved or set to EIO. + */ +static int read_blob_file(oci_blob_store_t *blobs, oci_digest_algo_t algo, + const char *hex, char **out_body, size_t *out_len) +{ + char path[4096]; + int n = oci_blob_store_path(blobs, algo, hex, path, sizeof(path)); + if (n < 0 || (size_t) n >= sizeof(path)) { + errno = ENAMETOOLONG; + return -1; + } + int fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + struct stat st; + if (fstat(fd, &st) < 0) { + int saved = errno; + close(fd); + errno = saved; + return -1; + } + if (st.st_size < 0 || (uintmax_t) st.st_size > INSPECT_BODY_MAX) { + close(fd); + errno = EFBIG; + return -1; + } + size_t want = (size_t) st.st_size; + char *buf = malloc(want + 1); + if (!buf) { + close(fd); + errno = ENOMEM; + return -1; + } + size_t off = 0; + while (off < want) { + ssize_t r = read(fd, buf + off, want - off); + if (r < 0) { + int saved = errno; + free(buf); + close(fd); + errno = saved; + return -1; + } + if (r == 0) + break; + off += (size_t) r; + } + close(fd); + if (off != want) { + free(buf); + errno = EIO; + return -1; + } + buf[want] = '\0'; + *out_body = buf; + *out_len = want; + return 0; +} + +/* Print the config + layer table for a parsed manifest. When manifest_digest + * is non-NULL, a "manifest: ()" header line goes + * first; the direct-manifest path passes NULL so it does not duplicate the + * already-printed pin line. + */ +static void render_manifest(FILE *out, const oci_manifest_t *mf, + const char *manifest_digest) +{ + if (manifest_digest) { + const char *mt = oci_media_type_name(mf->media_type); + fprintf(out, "manifest: %s (%s)\n", manifest_digest, + mt ? mt : "unknown"); + } + char buf[24]; + short_digest(mf->config.digest_str, buf); + const char *config_mt = oci_media_type_name(mf->config.media_type); + fprintf(out, " config: %-22s %12" PRId64 "B %s\n", buf, + mf->config.size, config_mt ? config_mt : "unknown"); + fprintf(out, " layers:\n"); + for (size_t i = 0; i < mf->nlayers; i++) { + const oci_descriptor_t *l = &mf->layers[i]; + short_digest(l->digest_str, buf); + const char *lmt = oci_media_type_name(l->media_type); + fprintf(out, " [%zu] %-22s %12" PRId64 "B %s\n", i, buf, + l->size, lmt ? lmt : "unknown"); + } +} + +/* Render the index entry table. Default mode prints only the picked + * linux/arm64 entry (with a "[arm64]" tag); --all-platforms prints every + * entry, tagging the picked one so users still see which one elfuse will + * resolve. + */ +static void render_index_platforms(FILE *out, const oci_index_t *idx, + const oci_index_entry_t *picked, + bool show_all) +{ + fprintf(out, "platforms:\n"); + for (size_t i = 0; i < idx->nentries; i++) { + const oci_index_entry_t *e = &idx->entries[i]; + bool is_picked = (e == picked); + if (!show_all && !is_picked) + continue; + char digest_buf[24]; + short_digest(e->desc.digest_str, digest_buf); + char platform_buf[64]; + render_platform(&e->platform, platform_buf); + const char *mt = oci_media_type_name(e->desc.media_type); + fprintf(out, " %-9s %-22s %-22s %12" PRId64 "B %s\n", + is_picked ? "[arm64]" : "", platform_buf, digest_buf, + e->desc.size, mt ? mt : "unknown"); + } + fprintf(out, "\n"); +} + +int oci_inspect(oci_store_t *store, const oci_ref_t *ref, + const oci_inspect_options_t *opts, const char **err_msg) +{ + if (!store || !ref || !ref->registry || !ref->repository) { + if (err_msg) + *err_msg = "invalid arguments"; + errno = EINVAL; + return -1; + } + FILE *out = opts && opts->out ? opts->out : stdout; + bool show_all = opts && opts->show_all_platforms; + + /* 1. Resolve manifest digest from ref. */ + char *pinned = NULL; + bool from_pin = false; + if (ref->digest) { + pinned = strdup(ref->digest); + if (!pinned) { + errno = ENOMEM; + if (err_msg) + *err_msg = "out of memory"; + return -1; + } + } else if (ref->tag) { + const char *get_err = NULL; + int gr = oci_store_get_ref(store, ref, &pinned, &get_err); + if (gr < 0) { + if (errno == ENOENT) { + fprintf(out, + "pinned: (no local manifest; run 'elfuse oci " + "pull' first)\n"); + return 0; + } + if (err_msg) + *err_msg = get_err ? get_err : "failed to read pin"; + return -1; + } + from_pin = true; + } else { + /* The slice 1 ref parser defaults tag to "latest" when no digest is + * given, so this branch is structurally unreachable through the CLI. + * Guard it anyway so a hand-constructed ref does not segfault. + */ + if (err_msg) + *err_msg = "ref has neither tag nor digest"; + errno = EINVAL; + return -1; + } + + /* 2. Print the pin line. The digest reference annotation tells the user + * this came from ref->digest rather than the local pin file. + */ + if (from_pin) { + fprintf(out, "pinned: %s\n", pinned); + } else { + fprintf(out, "pinned: %s (digest reference)\n", pinned); + } + + /* 3. Validate the digest and read the blob. */ + oci_digest_algo_t algo; + char hex[OCI_DIGEST_HEX_MAX + 1]; + if (!oci_digest_parse(pinned, &algo, hex)) { + if (err_msg) + *err_msg = "pinned digest is malformed"; + errno = EINVAL; + free(pinned); + return -1; + } + + char *body = NULL; + size_t body_len = 0; + if (read_blob_file(oci_store_blobs(store), algo, hex, &body, &body_len) < + 0) { + if (errno == ENOENT) { + fprintf(out, + "error: manifest blob %s not found in local store\n", + pinned); + if (err_msg) + *err_msg = "manifest blob missing from local store"; + free(pinned); + errno = ENOENT; + return -1; + } + int saved = errno; + if (err_msg) + *err_msg = "failed to read manifest blob"; + free(pinned); + errno = saved; + return -1; + } + + /* 4. Classify: try index first, then manifest. The two parsers reject + * disjoint shapes (one requires "manifests", the other requires "config" + * + "layers"), so a successful parse is unambiguous. + */ + oci_index_t idx = {0}; + oci_manifest_t mf = {0}; + bool is_index = false; + bool is_manifest = false; + if (oci_index_parse(body, body_len, &idx, NULL) == 0) { + is_index = true; + } else if (oci_manifest_parse(body, body_len, &mf, NULL) == 0) { + is_manifest = true; + } else { + if (err_msg) + *err_msg = "manifest blob is neither a valid index nor manifest"; + errno = EPROTO; + free(body); + free(pinned); + return -1; + } + + /* 5. Render. */ + int rc = 0; + if (is_index) { + const char *imt = oci_media_type_name(idx.media_type); + fprintf(out, "type: image index (%s)\n\n", + imt ? imt : "unknown"); + + const oci_index_entry_t *picked = oci_index_pick_linux_arm64(&idx); + render_index_platforms(out, &idx, picked, show_all); + + /* Default mode drills into the picked linux/arm64 sub-manifest. The + * --all-platforms request is "show me the cover", not "drill"; skip + * the sub-manifest read entirely. + */ + if (!show_all) { + if (!picked) { + fprintf(out, "error: index has no linux/arm64 entry\n"); + if (err_msg) + *err_msg = "index has no linux/arm64 entry"; + errno = ENOENT; + rc = -1; + } else { + char *sub_body = NULL; + size_t sub_len = 0; + if (read_blob_file(oci_store_blobs(store), picked->desc.algo, + picked->desc.hex, &sub_body, &sub_len) < + 0) { + if (errno == ENOENT) { + fprintf(stderr, + "warning: linux/arm64 manifest blob %s not " + "in local store\n", + picked->desc.digest_str); + if (err_msg) + *err_msg = + "indexed manifest blob missing from local " + "store"; + errno = ENOENT; + rc = -1; + } else { + int saved = errno; + if (err_msg) + *err_msg = "failed to read sub-manifest blob"; + errno = saved; + rc = -1; + } + } else { + oci_manifest_t sub_mf = {0}; + if (oci_manifest_parse(sub_body, sub_len, &sub_mf, NULL) == + 0) { + render_manifest(out, &sub_mf, picked->desc.digest_str); + oci_manifest_free(&sub_mf); + } else { + fprintf(out, + "error: sub-manifest blob %s is malformed\n", + picked->desc.digest_str); + if (err_msg) + *err_msg = "sub-manifest is malformed"; + errno = EPROTO; + rc = -1; + } + free(sub_body); + } + } + } + } else if (is_manifest) { + const char *mmt = oci_media_type_name(mf.media_type); + fprintf(out, "type: image manifest (%s)\n\n", + mmt ? mmt : "unknown"); + render_manifest(out, &mf, NULL); + } + + /* errno preserved across cleanup, like slice 5a oci_pull. */ + int saved_errno = errno; + oci_index_free(&idx); + oci_manifest_free(&mf); + free(body); + free(pinned); + if (rc != 0) + errno = saved_errno; + return rc; +} diff --git a/src/oci/inspect.h b/src/oci/inspect.h new file mode 100644 index 0000000..6508e18 --- /dev/null +++ b/src/oci/inspect.h @@ -0,0 +1,61 @@ +/* Offline manifest tree renderer for elfuse oci inspect + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Reads the local store the slice 5a pull pipeline populated and prints the + * resolved manifest graph without touching the network. The function does not + * print the canonical reference header (registry / repository / tag / digest); + * that piece is owned by src/oci/cli.c so the slice-1 inspect smoke output + * stays exactly the same when the store has no record for a ref. + * + * Manifest digest resolution order: + * 1. ref->digest, when set (digest-pinned reference) + * 2. Pin file /refs/// + * 3. Neither: print "(no local manifest...)" and return 0 (informational) + * + * Render policy: + * - The blob is parsed as an index or a manifest based on the canonical + * mediaType embedded in the JSON. Unknown media types abort with EPROTO. + * - For an image index: prints a platform table. Default mode shows only + * the linux/arm64 entry and then drills into its sub-manifest to print + * the config descriptor and layer table. --all-platforms (opts-> + * show_all_platforms) lists every entry and skips the drill -- it is + * "what platforms does this image cover", not "what is inside the arm64 + * variant". + * - For an image manifest: prints config + layers directly. + * + * Failure mode for partial stores: when the index loads but the linux/arm64 + * sub-manifest blob is missing from the store, the platform table is still + * printed (stdout), a warning lands on stderr, and the call returns -1 with + * errno=ENOENT. That preserves the informational view while letting scripts + * detect the inconsistency through the exit code. + */ + +#pragma once + +#include +#include + +#include "ref.h" +#include "store.h" + +typedef struct { + /* Destination for the rendered tree. NULL defaults to stdout. */ + FILE *out; + /* List every platform entry of an image index instead of only the picked + * linux/arm64 entry. In this mode oci_inspect does not drill into any + * sub-manifest. + */ + bool show_all_platforms; +} oci_inspect_options_t; + +/* Render the manifest tree the store holds for ref. opts may be NULL for the + * defaults (out=stdout, show_all_platforms=false). Returns 0 on success or + * pin miss; -1 with errno preserved and *err_msg (when non-NULL) pointing at + * a static description on failure (malformed blob, blob missing, IO error). + */ +int oci_inspect(oci_store_t *store, + const oci_ref_t *ref, + const oci_inspect_options_t *opts, + const char **err_msg); diff --git a/tests/test-oci-inspect.c b/tests/test-oci-inspect.c new file mode 100644 index 0000000..f5d6310 --- /dev/null +++ b/tests/test-oci-inspect.c @@ -0,0 +1,593 @@ +/* elfuse oci inspect renderer unit tests + * + * Copyright 2026 elfuse contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Drives oci_inspect against a pre-populated scratch store. The store is + * built directly via oci_blob_store_put_bytes + oci_store_put_ref so the + * cases stay independent of the slice 4 fetcher and the slice 5a pull + * pipeline. open_memstream captures stdout and the assertions grep for + * distinctive substrings (digest prefixes, section headers, "[arm64]" tag) + * so output format tweaks do not cause spurious failures unless the + * semantically-relevant fields disappear. + * + * Cases: + * 1. Direct manifest pull + pin: config + layers section, layer count + * 2. Index + arm64 picked: platform table with [arm64] tag, drill prints + * manifest layers + * 3. Index + --all-platforms: every platform listed, no drill section + * 4. Pin miss: "(no local manifest...)" on stdout, rc=0 + * 5. ref with digest, blob missing: "error: manifest blob ... not found", + * rc=-1 errno=ENOENT + * 6. Index ok, sub-manifest blob missing: stdout contains the platform + * table, rc=-1 errno=ENOENT, err_msg identifies the missing blob + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oci/blob-store.h" +#include "oci/digest.h" +#include "oci/inspect.h" +#include "oci/ref.h" +#include "oci/store.h" + +#define GREEN "\033[0;32m" +#define RED "\033[0;31m" +#define RESET "\033[0m" + +static int g_total = 0; +static int g_passed = 0; + +static void report_pass(const char *name) +{ + g_total++; + g_passed++; + printf(" " GREEN "OK" RESET " %s\n", name); +} + +static void report_fail(const char *name, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +static void report_fail(const char *name, const char *fmt, ...) +{ + g_total++; + printf(" " RED "FAIL" RESET " %s", name); + if (fmt && *fmt) { + printf(": "); + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + } + printf("\n"); +} + +static int remove_entry(const char *path, const struct stat *st, int typeflag, + struct FTW *ftwbuf) +{ + (void) st; + (void) typeflag; + (void) ftwbuf; + return remove(path); +} + +static void wipe_dir(const char *root) +{ + (void) nftw(root, remove_entry, 8, FTW_DEPTH | FTW_PHYS); +} + +static char *make_scratch_root(void) +{ + char tmpl[] = "/tmp/elfuse-test-oci-inspect-XXXXXX"; + if (!mkdtemp(tmpl)) + return NULL; + return strdup(tmpl); +} + +/* Drop the manifest body bytes that the slice 5a pull pipeline would + * normally have written. Hashes them with SHA-256 so the digest stays + * consistent with the bytes the store will serve back. + */ +static char *put_manifest_blob(oci_blob_store_t *blobs, const char *body, + size_t body_len, char *out_digest_str, + size_t out_cap, char *out_hex) +{ + if (oci_digest_bytes(OCI_DIGEST_SHA256, body, body_len, out_hex) == 0) { + fprintf(stderr, "hash failed\n"); + return NULL; + } + snprintf(out_digest_str, out_cap, "sha256:%s", out_hex); + if (oci_blob_store_put_bytes(blobs, OCI_DIGEST_SHA256, out_hex, body, + body_len) < 0) { + fprintf(stderr, "blob put failed: %s\n", strerror(errno)); + return NULL; + } + return out_hex; +} + +static char *vformat(size_t *out_len, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +static char *vformat(size_t *out_len, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(NULL, 0, fmt, ap); + va_end(ap); + if (n < 0) + return NULL; + char *r = malloc((size_t) n + 1); + if (!r) + return NULL; + va_start(ap, fmt); + vsnprintf(r, (size_t) n + 1, fmt, ap); + va_end(ap); + *out_len = (size_t) n; + return r; +} + +/* Run oci_inspect and return the captured stdout bytes via *out_buf (caller + * frees) plus the rc / saved errno / err_msg. + */ +typedef struct { + int rc; + int saved_errno; + const char *err_msg; + char *out; + size_t out_len; +} inspect_result_t; + +static void run_inspect(oci_store_t *store, const oci_ref_t *ref, + const oci_inspect_options_t *base_opts, + inspect_result_t *result) +{ + memset(result, 0, sizeof(*result)); + char *buf = NULL; + size_t cap = 0; + FILE *fp = open_memstream(&buf, &cap); + if (!fp) { + result->rc = -1; + result->saved_errno = errno; + return; + } + oci_inspect_options_t opts = base_opts ? *base_opts + : (oci_inspect_options_t){0}; + opts.out = fp; + const char *err = NULL; + errno = 0; + result->rc = oci_inspect(store, ref, &opts, &err); + result->saved_errno = errno; + result->err_msg = err; + fflush(fp); + fclose(fp); + result->out = buf; + result->out_len = cap; +} + +static bool contains(const char *haystack, const char *needle) +{ + return haystack && needle && strstr(haystack, needle) != NULL; +} + +/* ── Case 1: direct manifest ─────────────────────────────────────── */ + +static void case_direct_manifest(const char *scratch) +{ + const char *name = "inspect: direct manifest renders config + layers"; + char root[1024]; + snprintf(root, sizeof(root), "%s/case-direct", scratch); + oci_store_t *store = oci_store_open(root); + oci_blob_store_t *blobs = oci_store_blobs(store); + + static const char LAYER1[] = "layer-one-bytes"; + static const char LAYER2[] = "layer-two-bytes-longer"; + char l1_hex[OCI_DIGEST_HEX_MAX + 1]; + char l2_hex[OCI_DIGEST_HEX_MAX + 1]; + char l1_digest[OCI_DIGEST_HEX_MAX + 16]; + char l2_digest[OCI_DIGEST_HEX_MAX + 16]; + put_manifest_blob(blobs, LAYER1, sizeof(LAYER1) - 1, l1_digest, + sizeof(l1_digest), l1_hex); + put_manifest_blob(blobs, LAYER2, sizeof(LAYER2) - 1, l2_digest, + sizeof(l2_digest), l2_hex); + + static const char CONFIG[] = "{\"architecture\":\"arm64\"}"; + char cfg_hex[OCI_DIGEST_HEX_MAX + 1]; + char cfg_digest[OCI_DIGEST_HEX_MAX + 16]; + put_manifest_blob(blobs, CONFIG, sizeof(CONFIG) - 1, cfg_digest, + sizeof(cfg_digest), cfg_hex); + + size_t mlen = 0; + char *manifest = vformat( + &mlen, + "{\"schemaVersion\":2," + "\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"config\":{" + "\"mediaType\":\"application/vnd.oci.image.config.v1+json\"," + "\"digest\":\"%s\",\"size\":%zu}," + "\"layers\":[" + "{\"mediaType\":\"application/vnd.oci.image.layer.v1.tar+gzip\"," + "\"digest\":\"%s\",\"size\":%zu}," + "{\"mediaType\":\"application/vnd.oci.image.layer.v1.tar+gzip\"," + "\"digest\":\"%s\",\"size\":%zu}]}", + cfg_digest, sizeof(CONFIG) - 1, l1_digest, sizeof(LAYER1) - 1, + l2_digest, sizeof(LAYER2) - 1); + + char m_hex[OCI_DIGEST_HEX_MAX + 1]; + char m_digest[OCI_DIGEST_HEX_MAX + 16]; + put_manifest_blob(blobs, manifest, mlen, m_digest, sizeof(m_digest), + m_hex); + + oci_ref_t ref = {0}; + const char *parse_err = NULL; + oci_ref_parse("alpine:3.20", &ref, &parse_err); + oci_store_put_ref(store, &ref, m_digest, NULL); + + inspect_result_t r; + run_inspect(store, &ref, NULL, &r); + + if (r.rc != 0) { + report_fail(name, "rc=%d errno=%d err=%s", r.rc, r.saved_errno, + r.err_msg ? r.err_msg : "(none)"); + } else if (!contains(r.out, "pinned:")) { + report_fail(name, "missing pinned line"); + } else if (!contains(r.out, m_digest)) { + report_fail(name, "missing manifest digest in output"); + } else if (!contains(r.out, "type: image manifest")) { + report_fail(name, "missing type line"); + } else if (!contains(r.out, "config:")) { + report_fail(name, "missing config line"); + } else if (!contains(r.out, "layers:")) { + report_fail(name, "missing layers section"); + } else if (!contains(r.out, "[0]")) { + report_fail(name, "missing layer index [0]"); + } else if (!contains(r.out, "[1]")) { + report_fail(name, "missing layer index [1]"); + } else if (contains(r.out, "[2]")) { + report_fail(name, "unexpected layer index [2]"); + } else { + report_pass(name); + } + + free(r.out); + free(manifest); + oci_ref_free(&ref); + oci_store_close(store); +} + +/* ── Helpers for index-based cases ───────────────────────────────── */ + +/* Three-platform index where linux/arm64/v8 references manifest_digest. The + * other two entries point at digests the test never stores; the renderer does + * not need them for the default-mode drill. + */ +static char *build_index_three_platforms(size_t *out_len, + const char *arm64_digest, + size_t arm64_size) +{ + return vformat( + out_len, + "{\"schemaVersion\":2," + "\"mediaType\":\"application/vnd.oci.image.index.v1+json\"," + "\"manifests\":[" + "{\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"digest\":\"sha256:1111111111111111111111111111111111111111111111111111111111111111\"," + "\"size\":1024," + "\"platform\":{\"architecture\":\"amd64\",\"os\":\"linux\"}}," + "{\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"digest\":\"%s\",\"size\":%zu," + "\"platform\":{\"architecture\":\"arm64\",\"os\":\"linux\"," + "\"variant\":\"v8\"}}," + "{\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"digest\":\"sha256:3333333333333333333333333333333333333333333333333333333333333333\"," + "\"size\":1024," + "\"platform\":{\"architecture\":\"s390x\",\"os\":\"linux\"}}]}", + arm64_digest, arm64_size); +} + +/* Build a minimal manifest body and persist it. Returns the manifest digest + * string (heap, caller frees) for the index to reference. + */ +static char *build_and_store_manifest(oci_blob_store_t *blobs, size_t *out_len) +{ + static const char BODY[] = + "{\"schemaVersion\":2," + "\"mediaType\":\"application/vnd.oci.image.manifest.v1+json\"," + "\"config\":{" + "\"mediaType\":\"application/vnd.oci.image.config.v1+json\"," + "\"digest\":\"sha256:00000000000000000000000000000000000000000000" + "00000000000000000000\",\"size\":1}," + "\"layers\":[" + "{\"mediaType\":\"application/vnd.oci.image.layer.v1.tar+gzip\"," + "\"digest\":\"sha256:00000000000000000000000000000000000000000000" + "00000000000000000001\",\"size\":2}," + "{\"mediaType\":\"application/vnd.oci.image.layer.v1.tar+gzip\"," + "\"digest\":\"sha256:00000000000000000000000000000000000000000000" + "00000000000000000002\",\"size\":3}]}"; + size_t len = sizeof(BODY) - 1; + char hex[OCI_DIGEST_HEX_MAX + 1]; + char digest[OCI_DIGEST_HEX_MAX + 16]; + if (!put_manifest_blob(blobs, BODY, len, digest, sizeof(digest), hex)) + return NULL; + *out_len = len; + return strdup(digest); +} + +/* ── Case 2: index drills arm64 ──────────────────────────────────── */ + +static void case_index_default_drills_arm64(const char *scratch) +{ + const char *name = "inspect: index drills linux/arm64 manifest"; + char root[1024]; + snprintf(root, sizeof(root), "%s/case-idx-default", scratch); + oci_store_t *store = oci_store_open(root); + oci_blob_store_t *blobs = oci_store_blobs(store); + + size_t m_len = 0; + char *m_digest = build_and_store_manifest(blobs, &m_len); + + size_t idx_len = 0; + char *idx_body = build_index_three_platforms(&idx_len, m_digest, m_len); + char idx_hex[OCI_DIGEST_HEX_MAX + 1]; + char idx_digest[OCI_DIGEST_HEX_MAX + 16]; + put_manifest_blob(blobs, idx_body, idx_len, idx_digest, sizeof(idx_digest), + idx_hex); + + oci_ref_t ref = {0}; + const char *parse_err = NULL; + oci_ref_parse("alpine:3.20", &ref, &parse_err); + oci_store_put_ref(store, &ref, idx_digest, NULL); + + inspect_result_t r; + run_inspect(store, &ref, NULL, &r); + + if (r.rc != 0) { + report_fail(name, "rc=%d errno=%d err=%s", r.rc, r.saved_errno, + r.err_msg ? r.err_msg : "(none)"); + } else if (!contains(r.out, "type: image index")) { + report_fail(name, "missing type=index line"); + } else if (!contains(r.out, "platforms:")) { + report_fail(name, "missing platforms section"); + } else if (!contains(r.out, "[arm64]")) { + report_fail(name, "missing [arm64] tag"); + } else if (!contains(r.out, "linux/arm64/v8")) { + report_fail(name, "missing linux/arm64/v8 platform string"); + } else if (contains(r.out, "linux/amd64")) { + report_fail(name, "amd64 listed in default mode (should be hidden)"); + } else if (!contains(r.out, "manifest:")) { + report_fail(name, "missing drill manifest section"); + } else if (!contains(r.out, "config:")) { + report_fail(name, "missing config line from drill"); + } else if (!contains(r.out, "layers:")) { + report_fail(name, "missing layers section from drill"); + } else { + report_pass(name); + } + + free(r.out); + free(m_digest); + free(idx_body); + oci_ref_free(&ref); + oci_store_close(store); +} + +/* ── Case 3: index --all-platforms ───────────────────────────────── */ + +static void case_index_all_platforms(const char *scratch) +{ + const char *name = "inspect: --all-platforms lists every entry, no drill"; + char root[1024]; + snprintf(root, sizeof(root), "%s/case-idx-all", scratch); + oci_store_t *store = oci_store_open(root); + oci_blob_store_t *blobs = oci_store_blobs(store); + + size_t m_len = 0; + char *m_digest = build_and_store_manifest(blobs, &m_len); + size_t idx_len = 0; + char *idx_body = build_index_three_platforms(&idx_len, m_digest, m_len); + char idx_hex[OCI_DIGEST_HEX_MAX + 1]; + char idx_digest[OCI_DIGEST_HEX_MAX + 16]; + put_manifest_blob(blobs, idx_body, idx_len, idx_digest, sizeof(idx_digest), + idx_hex); + + oci_ref_t ref = {0}; + const char *parse_err = NULL; + oci_ref_parse("alpine:3.20", &ref, &parse_err); + oci_store_put_ref(store, &ref, idx_digest, NULL); + + oci_inspect_options_t opts = {.show_all_platforms = true}; + inspect_result_t r; + run_inspect(store, &ref, &opts, &r); + + if (r.rc != 0) { + report_fail(name, "rc=%d errno=%d err=%s", r.rc, r.saved_errno, + r.err_msg ? r.err_msg : "(none)"); + } else if (!contains(r.out, "linux/amd64")) { + report_fail(name, "missing linux/amd64 entry"); + } else if (!contains(r.out, "linux/arm64/v8")) { + report_fail(name, "missing linux/arm64/v8 entry"); + } else if (!contains(r.out, "linux/s390x")) { + report_fail(name, "missing linux/s390x entry"); + } else if (!contains(r.out, "[arm64]")) { + report_fail(name, "missing [arm64] tag"); + } else if (contains(r.out, "manifest:")) { + /* The drill section starts with "manifest:". --all-platforms must + * not include it. + */ + report_fail(name, "drill section unexpectedly present"); + } else { + report_pass(name); + } + + free(r.out); + free(m_digest); + free(idx_body); + oci_ref_free(&ref); + oci_store_close(store); +} + +/* ── Case 4: pin miss ────────────────────────────────────────────── */ + +static void case_pin_miss(const char *scratch) +{ + const char *name = "inspect: pin miss prints informational line, rc=0"; + char root[1024]; + snprintf(root, sizeof(root), "%s/case-miss", scratch); + oci_store_t *store = oci_store_open(root); + + oci_ref_t ref = {0}; + const char *parse_err = NULL; + oci_ref_parse("alpine:never-pulled", &ref, &parse_err); + + inspect_result_t r; + run_inspect(store, &ref, NULL, &r); + + if (r.rc != 0) { + report_fail(name, "rc=%d (expected 0)", r.rc); + } else if (!contains(r.out, "(no local manifest")) { + report_fail(name, "missing informational text"); + } else { + report_pass(name); + } + + free(r.out); + oci_ref_free(&ref); + oci_store_close(store); +} + +/* ── Case 5: digest ref but blob missing ─────────────────────────── */ + +static void case_digest_blob_missing(const char *scratch) +{ + const char *name = "inspect: digest ref with missing blob errors out"; + char root[1024]; + snprintf(root, sizeof(root), "%s/case-digest-missing", scratch); + oci_store_t *store = oci_store_open(root); + + /* Use a synthetic digest that the store has never seen. */ + oci_ref_t ref = {0}; + const char *parse_err = NULL; + oci_ref_parse( + "alpine@sha256:00000000000000000000000000000000000000000000000000000000" + "00000000", + &ref, &parse_err); + + inspect_result_t r; + run_inspect(store, &ref, NULL, &r); + + if (r.rc != -1) { + report_fail(name, "rc=%d (expected -1)", r.rc); + } else if (r.saved_errno != ENOENT) { + report_fail(name, "errno=%d (expected ENOENT)", r.saved_errno); + } else if (!contains(r.out, "error: manifest blob")) { + report_fail(name, "missing error line on stdout"); + } else if (!contains(r.out, "(digest reference)")) { + report_fail(name, "missing digest reference annotation"); + } else { + report_pass(name); + } + + free(r.out); + oci_ref_free(&ref); + oci_store_close(store); +} + +/* ── Case 6: index ok, sub-manifest missing ──────────────────────── */ + +static void case_sub_manifest_missing(const char *scratch) +{ + const char *name = + "inspect: index ok but sub-manifest blob missing -> rc=-1, table" + " still shown"; + char root[1024]; + snprintf(root, sizeof(root), "%s/case-sub-missing", scratch); + oci_store_t *store = oci_store_open(root); + oci_blob_store_t *blobs = oci_store_blobs(store); + + /* Reference a manifest digest that is NOT in the store. */ + static const char ABSENT[] = + "sha256:dead00000000000000000000000000000000000000000000000000000000be" + "ef"; + size_t idx_len = 0; + char *idx_body = build_index_three_platforms(&idx_len, ABSENT, 1024); + char idx_hex[OCI_DIGEST_HEX_MAX + 1]; + char idx_digest[OCI_DIGEST_HEX_MAX + 16]; + put_manifest_blob(blobs, idx_body, idx_len, idx_digest, sizeof(idx_digest), + idx_hex); + + oci_ref_t ref = {0}; + const char *parse_err = NULL; + oci_ref_parse("alpine:3.20", &ref, &parse_err); + oci_store_put_ref(store, &ref, idx_digest, NULL); + + /* Redirect stderr to /dev/null so the warning line does not pollute the + * test driver output. The function under test still writes the warning; + * scripts key on rc + errno. + */ + int saved_stderr = dup(STDERR_FILENO); + int devnull = open("/dev/null", O_WRONLY); + if (devnull >= 0) { + dup2(devnull, STDERR_FILENO); + close(devnull); + } + + inspect_result_t r; + run_inspect(store, &ref, NULL, &r); + + if (saved_stderr >= 0) { + dup2(saved_stderr, STDERR_FILENO); + close(saved_stderr); + } + + if (r.rc != -1) { + report_fail(name, "rc=%d (expected -1)", r.rc); + } else if (r.saved_errno != ENOENT) { + report_fail(name, "errno=%d (expected ENOENT)", r.saved_errno); + } else if (!contains(r.out, "platforms:")) { + report_fail(name, "platform table not on stdout"); + } else if (!contains(r.out, "[arm64]")) { + report_fail(name, "[arm64] tag missing"); + } else if (!r.err_msg || + !contains(r.err_msg, "indexed manifest blob missing")) { + report_fail(name, "err_msg unexpected: %s", + r.err_msg ? r.err_msg : "(null)"); + } else { + report_pass(name); + } + + free(r.out); + free(idx_body); + oci_ref_free(&ref); + oci_store_close(store); +} + +int main(void) +{ + char *scratch = make_scratch_root(); + if (!scratch) { + fprintf(stderr, "scratch root mkdtemp failed: %s\n", strerror(errno)); + return 1; + } + printf("OCI inspect unit tests (scratch=%s)\n", scratch); + + case_direct_manifest(scratch); + case_index_default_drills_arm64(scratch); + case_index_all_platforms(scratch); + case_pin_miss(scratch); + case_digest_blob_missing(scratch); + case_sub_manifest_missing(scratch); + + wipe_dir(scratch); + free(scratch); + + printf("\nResults: %d/%d passed\n", g_passed, g_total); + return g_passed == g_total ? 0 : 1; +}