From b66dd64fa690bcd7606182ae928e9ec4e393e34e Mon Sep 17 00:00:00 2001
From: Danny Canter <danny_canter@apple.com>
Date: Thu, 19 Mar 2026 16:00:06 -0700
Subject: [PATCH] Add Seccomp support

This change is rather large, but I think it's simpler to get in as
one unit. It:

- Adds a new ContainerizationSeccomp target/product that is a cBPF compiler
specifically for seccomp. Its main use is to take in an OCI seccomp description
and spit out a filter we can apply.
- Adds a new friendly SeccompProfile API to Containerization to specify what filters
you'd like applied. This will (as is the case for basically everything else) get
translated to OCI behind the scenes.
- Adds a small bit of logic in vmexec to apply the filters.

And unit and integration tests for everything. Unit testing is interesting. I've added a small
simulator so we actually have some semblance of testing outside of just integration tests
and seeing if the syscall is blocked/returns whatever.
---
 Package.swift                                 |  14 +
 Sources/Containerization/LinuxContainer.swift |   5 +
 Sources/Containerization/LinuxPod.swift       |   3 +
 Sources/Containerization/SeccompProfile.swift | 808 ++++++++++++++++++
 .../SeccompCompiler+Aarch64.swift             | 328 +++++++
 .../SeccompCompiler.swift                     | 421 +++++++++
 Sources/Integration/ContainerTests.swift      | 306 +++++++
 Sources/Integration/Suite.swift               |   8 +
 .../BPFSimulator.swift                        | 139 +++
 .../SeccompCompilerTests.swift                | 369 ++++++++
 vminitd/Package.swift                         |   1 +
 vminitd/Sources/LCShim/include/syscall.h      |  14 +
 vminitd/Sources/LCShim/syscall.c              |  12 +
 vminitd/Sources/vmexec/ExecCommand.swift      |  32 +-
 vminitd/Sources/vmexec/RunCommand.swift       |  17 +-
 vminitd/Sources/vmexec/vmexec.swift           |  53 +-
 vminitd/Sources/vminitd/ManagedProcess.swift  |   2 +
 17 files changed, 2514 insertions(+), 18 deletions(-)
 create mode 100644 Sources/Containerization/SeccompProfile.swift
 create mode 100644 Sources/ContainerizationSeccomp/SeccompCompiler+Aarch64.swift
 create mode 100644 Sources/ContainerizationSeccomp/SeccompCompiler.swift
 create mode 100644 Tests/ContainerizationSeccompTests/BPFSimulator.swift
 create mode 100644 Tests/ContainerizationSeccompTests/SeccompCompilerTests.swift

diff --git a/Package.swift b/Package.swift
index 40997523..99e28965 100644
--- a/Package.swift
+++ b/Package.swift
@@ -33,6 +33,7 @@ let package = Package(
         .library(name: "ContainerizationOS", targets: ["ContainerizationOS"]),
         .library(name: "ContainerizationExtras", targets: ["ContainerizationExtras"]),
         .library(name: "ContainerizationArchive", targets: ["ContainerizationArchive"]),
+        .library(name: "ContainerizationSeccomp", targets: ["ContainerizationSeccomp"]),
         .executable(name: "cctl", targets: ["cctl"]),
     ],
     dependencies: [
@@ -264,5 +265,18 @@ let package = Package(
         .target(
             name: "CShim"
         ),
+        .target(
+            name: "ContainerizationSeccomp",
+            dependencies: [
+                "ContainerizationOCI"
+            ]
+        ),
+        .testTarget(
+            name: "ContainerizationSeccompTests",
+            dependencies: [
+                "ContainerizationSeccomp",
+                "ContainerizationOCI",
+            ]
+        ),
     ]
 )
diff --git a/Sources/Containerization/LinuxContainer.swift b/Sources/Containerization/LinuxContainer.swift
index 9033a206..d412e532 100644
--- a/Sources/Containerization/LinuxContainer.swift
+++ b/Sources/Containerization/LinuxContainer.swift
@@ -64,6 +64,8 @@ public final class LinuxContainer: Container, Sendable {
         public var sockets: [UnixSocketConfiguration] = []
         /// The mounts for the container.
         public var mounts: [Mount] = LinuxContainer.defaultMounts()
+        /// Seccomp profile for system call filtering.
+        public var seccomp: SeccompProfile?
         /// The DNS configuration for the container.
         public var dns: DNS?
         /// The hosts to add to /etc/hosts for the container.
@@ -90,6 +92,7 @@ public final class LinuxContainer: Container, Sendable {
             interfaces: [any Interface] = [],
             sockets: [UnixSocketConfiguration] = [],
             mounts: [Mount] = LinuxContainer.defaultMounts(),
+            seccomp: SeccompProfile? = nil,
             dns: DNS? = nil,
             hosts: Hosts? = nil,
             virtualization: Bool = false,
@@ -105,6 +108,7 @@ public final class LinuxContainer: Container, Sendable {
             self.interfaces = interfaces
             self.sockets = sockets
             self.mounts = mounts
+            self.seccomp = seccomp
             self.dns = dns
             self.hosts = hosts
             self.virtualization = virtualization
@@ -356,6 +360,7 @@ public final class LinuxContainer: Container, Sendable {
 
         // Linux toggles.
         spec.linux?.sysctl = config.sysctl
+        spec.linux?.seccomp = config.seccomp?.toOCI(effectiveCapabilities: config.process.capabilities.effective)
 
         // If the rootfs was requested as read-only, set it in the OCI spec.
         // We let the OCI runtime remount as ro, instead of doing it originally.
diff --git a/Sources/Containerization/LinuxPod.swift b/Sources/Containerization/LinuxPod.swift
index 7574d697..619b8991 100644
--- a/Sources/Containerization/LinuxPod.swift
+++ b/Sources/Containerization/LinuxPod.swift
@@ -78,6 +78,8 @@ public final class LinuxPod: Sendable {
         public var sysctl: [String: String] = [:]
         /// The mounts for the container.
         public var mounts: [Mount] = LinuxContainer.defaultMounts()
+        /// Seccomp profile for system call filtering.
+        public var seccomp: SeccompProfile?
         /// The Unix domain socket relays to setup for the container.
         public var sockets: [UnixSocketConfiguration] = []
         /// The DNS configuration for the container.
@@ -230,6 +232,7 @@ public final class LinuxPod: Sendable {
 
         // Linux toggles
         spec.linux?.sysctl = config.sysctl
+        spec.linux?.seccomp = config.seccomp?.toOCI(effectiveCapabilities: config.process.capabilities.effective)
 
         // If the rootfs was requested as read-only, set it in the OCI spec.
         // We let the OCI runtime remount as ro, instead of doing it originally.
diff --git a/Sources/Containerization/SeccompProfile.swift b/Sources/Containerization/SeccompProfile.swift
new file mode 100644
index 00000000..a89c1962
--- /dev/null
+++ b/Sources/Containerization/SeccompProfile.swift
@@ -0,0 +1,808 @@
+//===----------------------------------------------------------------------===//
+// Copyright © 2026 Apple Inc. and the Containerization project authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//===----------------------------------------------------------------------===//
+
+import ContainerizationOCI
+import ContainerizationOS
+
+/// Seccomp configuration.
+public struct SeccompProfile: Sendable {
+    /// The action to take for syscalls not matched by any rule.
+    public var defaultAction: Action
+    /// Architecture constraints.
+    public var architectures: [Architecture]
+    /// Seccomp filter flags.
+    public var flags: [Flag]
+    /// Per-syscall rules.
+    public var syscalls: [Rule]
+
+    /// The action to take for a matching syscall.
+    ///
+    /// Note: `SECCOMP_RET_TRACE` and `SECCOMP_RET_USER_NOTIF` are not
+    /// supported. Both require a host-side (in our case guest) process
+    // (ptrace tracer or seccomp notify listener).
+    public enum Action: Sendable {
+        case allow
+        case kill
+        case killProcess
+        case trap
+        case errno(UInt)
+        case log
+    }
+
+    /// Supported architectures.
+    public enum Architecture: Sendable {
+        case aarch64
+    }
+
+    /// Seccomp filter flags.
+    public enum Flag: Sendable {
+        case log
+        case specAllow
+        case waitKillableRecv
+    }
+
+    /// Identifies a syscall by name or raw number.
+    public enum Syscall: Sendable, ExpressibleByStringLiteral {
+        /// A syscall name (e.g. "mkdirat"). Must match an entry in the
+        /// aarch64 syscall table; unknown names are silently skipped.
+        case name(String)
+        /// A raw syscall number. Useful for newly added syscalls.
+        case number(UInt32)
+
+        public init(stringLiteral value: String) {
+            self = .name(value)
+        }
+    }
+
+    /// A rule that matches one or more syscalls.
+    public struct Rule: Sendable {
+        /// The syscalls this rule matches.
+        public var syscalls: [Syscall]
+        /// The action to take when matched.
+        public var action: Action
+        /// Optional argument conditions (all must match).
+        public var args: [ArgCondition]
+        /// Capabilities required for this rule to be included in the filter.
+        /// If non-empty, the rule is only emitted when the container has all
+        /// of these capabilities in its effective set.
+        public var requiredCapabilities: [CapabilityName]
+
+        public init(
+            syscalls: [Syscall],
+            action: Action,
+            args: [ArgCondition] = [],
+            requiredCapabilities: [CapabilityName] = []
+        ) {
+            self.syscalls = syscalls
+            self.action = action
+            self.args = args
+            self.requiredCapabilities = requiredCapabilities
+        }
+    }
+
+    /// A condition on a syscall argument.
+    public struct ArgCondition: Sendable {
+        /// The argument index (0-5).
+        public var index: UInt
+        /// The comparison operator.
+        public var op: Operator
+        /// The value to compare against.
+        public var value: UInt64
+        /// Second value (used with maskedEqual).
+        public var valueTwo: UInt64
+
+        /// Comparison operators for argument conditions.
+        public enum Operator: Sendable {
+            case equalTo
+            case notEqual
+            case lessThan
+            case lessEqual
+            case greaterThan
+            case greaterEqual
+            case maskedEqual
+        }
+
+        public init(index: UInt, op: Operator, value: UInt64, valueTwo: UInt64 = 0) {
+            self.index = index
+            self.op = op
+            self.value = value
+            self.valueTwo = valueTwo
+        }
+    }
+
+    /// A profile that allows all syscalls.
+    public static let allowAll = SeccompProfile(defaultAction: .allow)
+
+    /// Default seccomp profile for unprivileged containers.
+    public static let defaultProfile: SeccompProfile = {
+        var profile = SeccompProfile(defaultAction: .errno(1))
+        profile.syscalls = [
+            // Unconditional allowlist.
+            Rule(
+                syscalls: [
+                    "accept",
+                    "accept4",
+                    "access",
+                    "adjtimex",
+                    "alarm",
+                    "bind",
+                    "brk",
+                    "cachestat",
+                    "capget",
+                    "capset",
+                    "chdir",
+                    "chmod",
+                    "chown",
+                    "chown32",
+                    "clock_adjtime",
+                    "clock_adjtime64",
+                    "clock_getres",
+                    "clock_getres_time64",
+                    "clock_gettime",
+                    "clock_gettime64",
+                    "clock_nanosleep",
+                    "clock_nanosleep_time64",
+                    "close",
+                    "close_range",
+                    "connect",
+                    "copy_file_range",
+                    "creat",
+                    "dup",
+                    "dup2",
+                    "dup3",
+                    "epoll_create",
+                    "epoll_create1",
+                    "epoll_ctl",
+                    "epoll_ctl_old",
+                    "epoll_pwait",
+                    "epoll_pwait2",
+                    "epoll_wait",
+                    "epoll_wait_old",
+                    "eventfd",
+                    "eventfd2",
+                    "execve",
+                    "execveat",
+                    "exit",
+                    "exit_group",
+                    "faccessat",
+                    "faccessat2",
+                    "fadvise64",
+                    "fadvise64_64",
+                    "fallocate",
+                    "fanotify_mark",
+                    "fchdir",
+                    "fchmod",
+                    "fchmodat",
+                    "fchmodat2",
+                    "fchown",
+                    "fchown32",
+                    "fchownat",
+                    "fcntl",
+                    "fcntl64",
+                    "fdatasync",
+                    "fgetxattr",
+                    "flistxattr",
+                    "flock",
+                    "fork",
+                    "fremovexattr",
+                    "fsetxattr",
+                    "fstat",
+                    "fstat64",
+                    "fstatat",
+                    "fstatat64",
+                    "fstatfs",
+                    "fstatfs64",
+                    "fsync",
+                    "ftruncate",
+                    "ftruncate64",
+                    "futex",
+                    "futex_requeue",
+                    "futex_time64",
+                    "futex_wait",
+                    "futex_waitv",
+                    "futex_wake",
+                    "futimesat",
+                    "getcpu",
+                    "getcwd",
+                    "getdents",
+                    "getdents64",
+                    "getegid",
+                    "getegid32",
+                    "geteuid",
+                    "geteuid32",
+                    "getgid",
+                    "getgid32",
+                    "getgroups",
+                    "getgroups32",
+                    "getitimer",
+                    "getpeername",
+                    "getpgid",
+                    "getpgrp",
+                    "getpid",
+                    "getppid",
+                    "getpriority",
+                    "getrandom",
+                    "getresgid",
+                    "getresgid32",
+                    "getresuid",
+                    "getresuid32",
+                    "getrlimit",
+                    "get_robust_list",
+                    "getrusage",
+                    "getsid",
+                    "getsockname",
+                    "getsockopt",
+                    "get_thread_area",
+                    "gettid",
+                    "gettimeofday",
+                    "getuid",
+                    "getuid32",
+                    "getxattr",
+                    "getxattrat",
+                    "inotify_add_watch",
+                    "inotify_init",
+                    "inotify_init1",
+                    "inotify_rm_watch",
+                    "io_cancel",
+                    "ioctl",
+                    "io_destroy",
+                    "io_getevents",
+                    "io_pgetevents",
+                    "io_pgetevents_time64",
+                    "ioprio_get",
+                    "ioprio_set",
+                    "io_setup",
+                    "io_submit",
+                    "ipc",
+                    "add_key",
+                    "keyctl",
+                    "kill",
+                    "landlock_add_rule",
+                    "landlock_create_ruleset",
+                    "landlock_restrict_self",
+                    "lchown",
+                    "lchown32",
+                    "lgetxattr",
+                    "link",
+                    "linkat",
+                    "listen",
+                    "listmount",
+                    "listxattr",
+                    "listxattrat",
+                    "llistxattr",
+                    "_llseek",
+                    "lremovexattr",
+                    "lseek",
+                    "lsetxattr",
+                    "lstat",
+                    "lstat64",
+                    "madvise",
+                    "map_shadow_stack",
+                    "membarrier",
+                    "memfd_create",
+                    "memfd_secret",
+                    "mincore",
+                    "mkdir",
+                    "mkdirat",
+                    "mknod",
+                    "mknodat",
+                    "mlock",
+                    "mlock2",
+                    "mlockall",
+                    "mmap",
+                    "mmap2",
+                    "mprotect",
+                    "mq_getsetattr",
+                    "mq_notify",
+                    "mq_open",
+                    "mq_timedreceive",
+                    "mq_timedreceive_time64",
+                    "mq_timedsend",
+                    "mq_timedsend_time64",
+                    "mq_unlink",
+                    "mremap",
+                    "mseal",
+                    "msgctl",
+                    "msgget",
+                    "msgrcv",
+                    "msgsnd",
+                    "msync",
+                    "munlock",
+                    "munlockall",
+                    "munmap",
+                    "name_to_handle_at",
+                    "nanosleep",
+                    "newfstatat",
+                    "_newselect",
+                    "open",
+                    "openat",
+                    "openat2",
+                    "pause",
+                    "pidfd_open",
+                    "pidfd_send_signal",
+                    "pipe",
+                    "pipe2",
+                    "pkey_alloc",
+                    "pkey_free",
+                    "pkey_mprotect",
+                    "poll",
+                    "ppoll",
+                    "ppoll_time64",
+                    "prctl",
+                    "pread64",
+                    "preadv",
+                    "preadv2",
+                    "prlimit64",
+                    "process_mrelease",
+                    "pselect6",
+                    "pselect6_time64",
+                    "pwrite64",
+                    "pwritev",
+                    "pwritev2",
+                    "read",
+                    "readahead",
+                    "readlink",
+                    "readlinkat",
+                    "readv",
+                    "recv",
+                    "recvfrom",
+                    "recvmmsg",
+                    "recvmmsg_time64",
+                    "recvmsg",
+                    "remap_file_pages",
+                    "removexattr",
+                    "removexattrat",
+                    "rename",
+                    "renameat",
+                    "renameat2",
+                    "request_key",
+                    "restart_syscall",
+                    "rmdir",
+                    "rseq",
+                    "rt_sigaction",
+                    "rt_sigpending",
+                    "rt_sigprocmask",
+                    "rt_sigqueueinfo",
+                    "rt_sigreturn",
+                    "rt_sigsuspend",
+                    "rt_sigtimedwait",
+                    "rt_sigtimedwait_time64",
+                    "rt_tgsigqueueinfo",
+                    "sched_getaffinity",
+                    "sched_getattr",
+                    "sched_getparam",
+                    "sched_get_priority_max",
+                    "sched_get_priority_min",
+                    "sched_getscheduler",
+                    "sched_rr_get_interval",
+                    "sched_rr_get_interval_time64",
+                    "sched_setaffinity",
+                    "sched_setattr",
+                    "sched_setparam",
+                    "sched_setscheduler",
+                    "sched_yield",
+                    "seccomp",
+                    "select",
+                    "semctl",
+                    "semget",
+                    "semop",
+                    "semtimedop",
+                    "semtimedop_time64",
+                    "send",
+                    "sendfile",
+                    "sendfile64",
+                    "sendmmsg",
+                    "sendmsg",
+                    "sendto",
+                    "setfsgid",
+                    "setfsgid32",
+                    "setfsuid",
+                    "setfsuid32",
+                    "setgid",
+                    "setgid32",
+                    "setgroups",
+                    "setgroups32",
+                    "setitimer",
+                    "setpgid",
+                    "setpriority",
+                    "setregid",
+                    "setregid32",
+                    "setresgid",
+                    "setresgid32",
+                    "setresuid",
+                    "setresuid32",
+                    "setreuid",
+                    "setreuid32",
+                    "setrlimit",
+                    "set_robust_list",
+                    "setsid",
+                    "setsockopt",
+                    "set_thread_area",
+                    "set_tid_address",
+                    "setuid",
+                    "setuid32",
+                    "setxattr",
+                    "setxattrat",
+                    "shmat",
+                    "shmctl",
+                    "shmdt",
+                    "shmget",
+                    "shutdown",
+                    "sigaltstack",
+                    "signalfd",
+                    "signalfd4",
+                    "sigprocmask",
+                    "sigreturn",
+                    "socketcall",
+                    "socketpair",
+                    "splice",
+                    "stat",
+                    "stat64",
+                    "statfs",
+                    "statfs64",
+                    "statmount",
+                    "statx",
+                    "symlink",
+                    "symlinkat",
+                    "sync",
+                    "sync_file_range",
+                    "syncfs",
+                    "sysinfo",
+                    "tee",
+                    "tgkill",
+                    "time",
+                    "timer_create",
+                    "timer_delete",
+                    "timer_getoverrun",
+                    "timer_gettime",
+                    "timer_gettime64",
+                    "timer_settime",
+                    "timer_settime64",
+                    "timerfd_create",
+                    "timerfd_gettime",
+                    "timerfd_gettime64",
+                    "timerfd_settime",
+                    "timerfd_settime64",
+                    "times",
+                    "tkill",
+                    "truncate",
+                    "truncate64",
+                    "ugetrlimit",
+                    "umask",
+                    "uname",
+                    "unlink",
+                    "unlinkat",
+                    "uretprobe",
+                    "utime",
+                    "utimensat",
+                    "utimensat_time64",
+                    "utimes",
+                    "vfork",
+                    "vmsplice",
+                    "wait4",
+                    "waitid",
+                    "waitpid",
+                    "write",
+                    "writev",
+
+                    // arm/arm64 specific
+                    "arm_fadvise64_64",
+                    "arm_sync_file_range",
+                    "sync_file_range2",
+                    "breakpoint",
+                    "cacheflush",
+                    "set_tls",
+                ], action: .allow),
+
+            // socket: allow all address families except AF_VSOCK (40).
+            Rule(
+                syscalls: ["socket"],
+                action: .allow,
+                args: [ArgCondition(index: 0, op: .notEqual, value: 40)]
+            ),
+
+            // personality: only allow specific execution domains.
+            //   0x0        = PER_LINUX (default)
+            //   0x8        = ADDR_NO_RANDOMIZE
+            //   0x20000    = UNAME26 (report kernel as 2.6.x)
+            //   0x20008    = UNAME26 | ADDR_NO_RANDOMIZE
+            //   0xFFFFFFFF = query current personality
+            Rule(
+                syscalls: ["personality"],
+                action: .allow,
+                args: [ArgCondition(index: 0, op: .equalTo, value: 0x0)]
+            ),
+            Rule(
+                syscalls: ["personality"],
+                action: .allow,
+                args: [ArgCondition(index: 0, op: .equalTo, value: 0x8)]
+            ),
+            Rule(
+                syscalls: ["personality"],
+                action: .allow,
+                args: [ArgCondition(index: 0, op: .equalTo, value: 0x20000)]
+            ),
+            Rule(
+                syscalls: ["personality"],
+                action: .allow,
+                args: [ArgCondition(index: 0, op: .equalTo, value: 0x20008)]
+            ),
+            Rule(
+                syscalls: ["personality"],
+                action: .allow,
+                args: [ArgCondition(index: 0, op: .equalTo, value: 0xFFFF_FFFF)]
+            ),
+
+            // clone: allow only if no namespace creation flags are set.
+            // The mask 0x7E020000 covers CLONE_NEWNS | CLONE_NEWCGROUP |
+            // CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWPID |
+            // CLONE_NEWNET. If (flags & mask) == 0, no namespaces are being
+            // created and the clone is safe.
+            Rule(
+                syscalls: ["clone"],
+                action: .allow,
+                args: [ArgCondition(index: 0, op: .maskedEqual, value: 2_114_060_288, valueTwo: 0)]
+            ),
+
+            // clone3: return ENOSYS (38) to force glibc/musl to fall back to
+            // clone, where we can inspect the flags via the arg filter above.
+            // clone3 passes flags in a struct rather than a register, so BPF
+            // cannot inspect them directly.
+            Rule(syscalls: ["clone3"], action: .errno(38)),
+
+            // Capability-gated rules. These are only included when the
+            // container has the required capability in its effective set.
+
+            // CAP_DAC_READ_SEARCH
+            Rule(
+                syscalls: ["open_by_handle_at"],
+                action: .allow,
+                requiredCapabilities: [.dacReadSearch]
+            ),
+
+            // CAP_SYS_ADMIN: allow clone/clone3 without namespace flag
+            // restrictions, plus mount/namespace/admin syscalls.
+            Rule(
+                syscalls: [
+                    "bpf", "clone", "clone3", "fanotify_init",
+                    "fsconfig", "fsmount", "fsopen", "fspick",
+                    "lookup_dcookie",
+                    "mount", "mount_setattr", "move_mount", "open_tree",
+                    "perf_event_open",
+                    "quotactl", "quotactl_fd",
+                    "setdomainname", "sethostname", "setns",
+                    "syslog",
+                    "umount", "umount2", "unshare",
+                ],
+                action: .allow,
+                requiredCapabilities: [.sysAdmin]
+            ),
+
+            // CAP_SYS_BOOT
+            Rule(
+                syscalls: ["reboot"],
+                action: .allow,
+                requiredCapabilities: [.sysBoot]
+            ),
+
+            // CAP_SYS_CHROOT
+            Rule(
+                syscalls: ["chroot"],
+                action: .allow,
+                requiredCapabilities: [.sysChroot]
+            ),
+
+            // CAP_SYS_MODULE
+            Rule(
+                syscalls: ["delete_module", "init_module", "finit_module"],
+                action: .allow,
+                requiredCapabilities: [.sysModule]
+            ),
+
+            // CAP_SYS_PACCT
+            Rule(
+                syscalls: ["acct"],
+                action: .allow,
+                requiredCapabilities: [.sysPacct]
+            ),
+
+            // CAP_SYS_PTRACE
+            Rule(
+                syscalls: [
+                    "kcmp", "pidfd_getfd", "process_madvise",
+                    "process_vm_readv", "process_vm_writev", "ptrace",
+                ],
+                action: .allow,
+                requiredCapabilities: [.sysPtrace]
+            ),
+
+            // CAP_SYS_RAWIO
+            Rule(
+                syscalls: ["iopl", "ioperm"],
+                action: .allow,
+                requiredCapabilities: [.sysRawio]
+            ),
+
+            // CAP_SYS_TIME
+            Rule(
+                syscalls: ["settimeofday", "stime", "clock_settime", "clock_settime64"],
+                action: .allow,
+                requiredCapabilities: [.sysTime]
+            ),
+
+            // CAP_SYS_TTY_CONFIG
+            Rule(
+                syscalls: ["vhangup"],
+                action: .allow,
+                requiredCapabilities: [.sysTtyConfig]
+            ),
+
+            // CAP_SYS_NICE
+            Rule(
+                syscalls: ["get_mempolicy", "mbind", "set_mempolicy", "set_mempolicy_home_node"],
+                action: .allow,
+                requiredCapabilities: [.sysNice]
+            ),
+
+            // CAP_SYSLOG
+            Rule(
+                syscalls: ["syslog"],
+                action: .allow,
+                requiredCapabilities: [.syslog]
+            ),
+
+            // CAP_BPF
+            Rule(
+                syscalls: ["bpf"],
+                action: .allow,
+                requiredCapabilities: [.bpf]
+            ),
+
+            // CAP_PERFMON
+            Rule(
+                syscalls: ["perf_event_open"],
+                action: .allow,
+                requiredCapabilities: [.perfmon]
+            ),
+        ]
+        return profile
+    }()
+
+    public init(defaultAction: Action) {
+        self.defaultAction = defaultAction
+        self.architectures = [.aarch64]
+        self.flags = []
+        self.syscalls = []
+    }
+
+    /// Add a rule that allows the specified syscalls.
+    public mutating func allow(_ names: String...) {
+        syscalls.append(Rule(syscalls: names.map { .name($0) }, action: .allow))
+    }
+
+    /// Add a rule that returns the specified errno for the given syscalls.
+    public mutating func errno(_ errnoVal: UInt, _ names: String...) {
+        syscalls.append(Rule(syscalls: names.map { .name($0) }, action: .errno(errnoVal)))
+    }
+
+    /// Convert to OCI type for transport.
+    ///
+    /// Rules with `requiredCapabilities` are only included when the provided
+    /// effective capabilities contain all required capabilities for that rule.
+    public func toOCI(effectiveCapabilities: [CapabilityName] = []) -> ContainerizationOCI.LinuxSeccomp {
+        let capSet = Set(effectiveCapabilities)
+        let filteredSyscalls = syscalls.filter { rule in
+            rule.requiredCapabilities.isEmpty || rule.requiredCapabilities.allSatisfy { capSet.contains($0) }
+        }
+
+        return ContainerizationOCI.LinuxSeccomp(
+            defaultAction: defaultAction.toOCI(),
+            defaultErrnoRet: defaultAction.ociErrnoRet,
+            architectures: architectures.map { $0.toOCI() },
+            flags: flags.map { $0.toOCI() },
+            listenerPath: "",
+            listenerMetadata: "",
+            syscalls: filteredSyscalls.map { $0.toOCI() }
+        )
+    }
+}
+
+extension SeccompProfile.Action {
+    func toOCI() -> LinuxSeccompAction {
+        switch self {
+        case .allow: return .actAllow
+        case .kill: return .actKill
+        case .killProcess: return .actKillProcess
+        case .trap: return .actTrap
+        case .errno: return .actErrno
+        case .log: return .actLog
+        }
+    }
+
+    var ociErrnoRet: UInt? {
+        switch self {
+        case .errno(let val): return val
+        default: return nil
+        }
+    }
+}
+
+extension SeccompProfile.Architecture {
+    func toOCI() -> Arch {
+        switch self {
+        case .aarch64: return .archAARCH64
+        }
+    }
+}
+
+extension SeccompProfile.Flag {
+    func toOCI() -> LinuxSeccompFlag {
+        switch self {
+        case .log: return .flagLog
+        case .specAllow: return .flagSpecAllow
+        case .waitKillableRecv: return .flagWaitKillableRecv
+        }
+    }
+}
+
+extension SeccompProfile.Rule {
+    func toOCI() -> LinuxSyscall {
+        let errnoRet: UInt?
+        switch action {
+        case .errno(let val): errnoRet = val
+        default: errnoRet = nil
+        }
+
+        let names = syscalls.map { syscall -> String in
+            switch syscall {
+            case .name(let n): return n
+            case .number(let nr): return String(nr)
+            }
+        }
+
+        return LinuxSyscall(
+            names: names,
+            action: action.toOCI(),
+            errnoRet: errnoRet,
+            args: args.map { $0.toOCI() }
+        )
+    }
+}
+
+extension SeccompProfile.ArgCondition {
+    func toOCI() -> LinuxSeccompArg {
+        LinuxSeccompArg(
+            index: index,
+            value: value,
+            valueTwo: valueTwo,
+            op: op.toOCI()
+        )
+    }
+}
+
+extension SeccompProfile.ArgCondition.Operator {
+    func toOCI() -> LinuxSeccompOperator {
+        switch self {
+        case .equalTo: return .opEqualTo
+        case .notEqual: return .opNotEqual
+        case .lessThan: return .opLessThan
+        case .lessEqual: return .opLessEqual
+        case .greaterThan: return .opGreaterThan
+        case .greaterEqual: return .opGreaterEqual
+        case .maskedEqual: return .opMaskedEqual
+        }
+    }
+}
diff --git a/Sources/ContainerizationSeccomp/SeccompCompiler+Aarch64.swift b/Sources/ContainerizationSeccomp/SeccompCompiler+Aarch64.swift
new file mode 100644
index 00000000..49cb7a1b
--- /dev/null
+++ b/Sources/ContainerizationSeccomp/SeccompCompiler+Aarch64.swift
@@ -0,0 +1,328 @@
+//===----------------------------------------------------------------------===//
+// Copyright © 2026 Apple Inc. and the Containerization project authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//===----------------------------------------------------------------------===//
+
+extension SeccompCompiler {
+    /// Linux aarch64 syscall table.
+    public static let aarch64SyscallTable: [String: UInt32] = [
+        "io_setup": 0,
+        "io_destroy": 1,
+        "io_submit": 2,
+        "io_cancel": 3,
+        "io_getevents": 4,
+        "setxattr": 5,
+        "lsetxattr": 6,
+        "fsetxattr": 7,
+        "getxattr": 8,
+        "lgetxattr": 9,
+        "fgetxattr": 10,
+        "listxattr": 11,
+        "llistxattr": 12,
+        "flistxattr": 13,
+        "removexattr": 14,
+        "lremovexattr": 15,
+        "fremovexattr": 16,
+        "getcwd": 17,
+        "lookup_dcookie": 18,
+        "eventfd2": 19,
+        "epoll_create1": 20,
+        "epoll_ctl": 21,
+        "epoll_pwait": 22,
+        "dup": 23,
+        "dup3": 24,
+        "fcntl": 25,
+        "inotify_init1": 26,
+        "inotify_add_watch": 27,
+        "inotify_rm_watch": 28,
+        "ioctl": 29,
+        "ioprio_set": 30,
+        "ioprio_get": 31,
+        "flock": 32,
+        "mknodat": 33,
+        "mkdirat": 34,
+        "unlinkat": 35,
+        "symlinkat": 36,
+        "linkat": 37,
+        "renameat": 38,
+        "umount2": 39,
+        "mount": 40,
+        "pivot_root": 41,
+        "nfsservctl": 42,
+        "statfs": 43,
+        "fstatfs": 44,
+        "truncate": 45,
+        "ftruncate": 46,
+        "fallocate": 47,
+        "faccessat": 48,
+        "chdir": 49,
+        "fchdir": 50,
+        "chroot": 51,
+        "fchmod": 52,
+        "fchmodat": 53,
+        "fchownat": 54,
+        "fchown": 55,
+        "openat": 56,
+        "close": 57,
+        "vhangup": 58,
+        "pipe2": 59,
+        "quotactl": 60,
+        "getdents64": 61,
+        "lseek": 62,
+        "read": 63,
+        "write": 64,
+        "readv": 65,
+        "writev": 66,
+        "pread64": 67,
+        "pwrite64": 68,
+        "preadv": 69,
+        "pwritev": 70,
+        "sendfile": 71,
+        "pselect6": 72,
+        "ppoll": 73,
+        "signalfd4": 74,
+        "vmsplice": 75,
+        "splice": 76,
+        "tee": 77,
+        "readlinkat": 78,
+        "fstatat": 79,
+        "fstat": 80,
+        "sync": 81,
+        "fsync": 82,
+        "fdatasync": 83,
+        "sync_file_range": 84,
+        "timerfd_create": 85,
+        "timerfd_settime": 86,
+        "timerfd_gettime": 87,
+        "utimensat": 88,
+        "acct": 89,
+        "capget": 90,
+        "capset": 91,
+        "personality": 92,
+        "exit": 93,
+        "exit_group": 94,
+        "waitid": 95,
+        "set_tid_address": 96,
+        "unshare": 97,
+        "futex": 98,
+        "set_robust_list": 99,
+        "get_robust_list": 100,
+        "nanosleep": 101,
+        "getitimer": 102,
+        "setitimer": 103,
+        "kexec_load": 104,
+        "init_module": 105,
+        "delete_module": 106,
+        "timer_create": 107,
+        "timer_gettime": 108,
+        "timer_getoverrun": 109,
+        "timer_settime": 110,
+        "timer_delete": 111,
+        "clock_settime": 112,
+        "clock_gettime": 113,
+        "clock_getres": 114,
+        "clock_nanosleep": 115,
+        "syslog": 116,
+        "ptrace": 117,
+        "sched_setparam": 118,
+        "sched_setscheduler": 119,
+        "sched_getscheduler": 120,
+        "sched_getparam": 121,
+        "sched_setaffinity": 122,
+        "sched_getaffinity": 123,
+        "sched_yield": 124,
+        "sched_get_priority_max": 125,
+        "sched_get_priority_min": 126,
+        "sched_rr_get_interval": 127,
+        "restart_syscall": 128,
+        "kill": 129,
+        "tkill": 130,
+        "tgkill": 131,
+        "sigaltstack": 132,
+        "rt_sigsuspend": 133,
+        "rt_sigaction": 134,
+        "rt_sigprocmask": 135,
+        "rt_sigpending": 136,
+        "rt_sigtimedwait": 137,
+        "rt_sigqueueinfo": 138,
+        "rt_sigreturn": 139,
+        "setpriority": 140,
+        "getpriority": 141,
+        "reboot": 142,
+        "setregid": 143,
+        "setgid": 144,
+        "setreuid": 145,
+        "setuid": 146,
+        "setresuid": 147,
+        "getresuid": 148,
+        "setresgid": 149,
+        "getresgid": 150,
+        "setfsuid": 151,
+        "setfsgid": 152,
+        "times": 153,
+        "setpgid": 154,
+        "getpgid": 155,
+        "getsid": 156,
+        "setsid": 157,
+        "getgroups": 158,
+        "setgroups": 159,
+        "uname": 160,
+        "sethostname": 161,
+        "setdomainname": 162,
+        "getrlimit": 163,
+        "setrlimit": 164,
+        "getrusage": 165,
+        "umask": 166,
+        "prctl": 167,
+        "getcpu": 168,
+        "gettimeofday": 169,
+        "settimeofday": 170,
+        "adjtimex": 171,
+        "getpid": 172,
+        "getppid": 173,
+        "getuid": 174,
+        "geteuid": 175,
+        "getgid": 176,
+        "getegid": 177,
+        "gettid": 178,
+        "sysinfo": 179,
+        "mq_open": 180,
+        "mq_unlink": 181,
+        "mq_timedsend": 182,
+        "mq_timedreceive": 183,
+        "mq_notify": 184,
+        "mq_getsetattr": 185,
+        "msgget": 186,
+        "msgctl": 187,
+        "msgrcv": 188,
+        "msgsnd": 189,
+        "semget": 190,
+        "semctl": 191,
+        "semtimedop": 192,
+        "semop": 193,
+        "shmget": 194,
+        "shmctl": 195,
+        "shmat": 196,
+        "shmdt": 197,
+        "socket": 198,
+        "socketpair": 199,
+        "bind": 200,
+        "listen": 201,
+        "accept": 202,
+        "connect": 203,
+        "getsockname": 204,
+        "getpeername": 205,
+        "sendto": 206,
+        "recvfrom": 207,
+        "setsockopt": 208,
+        "getsockopt": 209,
+        "shutdown": 210,
+        "sendmsg": 211,
+        "recvmsg": 212,
+        "readahead": 213,
+        "brk": 214,
+        "munmap": 215,
+        "mremap": 216,
+        "add_key": 217,
+        "request_key": 218,
+        "keyctl": 219,
+        "clone": 220,
+        "execve": 221,
+        "mmap": 222,
+        "fadvise64": 223,
+        "swapon": 224,
+        "swapoff": 225,
+        "mprotect": 226,
+        "msync": 227,
+        "mlock": 228,
+        "munlock": 229,
+        "mlockall": 230,
+        "munlockall": 231,
+        "mincore": 232,
+        "madvise": 233,
+        "remap_file_pages": 234,
+        "mbind": 235,
+        "get_mempolicy": 236,
+        "set_mempolicy": 237,
+        "migrate_pages": 238,
+        "move_pages": 239,
+        "rt_tgsigqueueinfo": 240,
+        "perf_event_open": 241,
+        "accept4": 242,
+        "recvmmsg": 243,
+        "arch_specific_syscall": 244,
+        "wait4": 260,
+        "prlimit64": 261,
+        "fanotify_init": 262,
+        "fanotify_mark": 263,
+        "name_to_handle_at": 264,
+        "open_by_handle_at": 265,
+        "clock_adjtime": 266,
+        "syncfs": 267,
+        "setns": 268,
+        "sendmmsg": 269,
+        "process_vm_readv": 270,
+        "process_vm_writev": 271,
+        "kcmp": 272,
+        "finit_module": 273,
+        "sched_setattr": 274,
+        "sched_getattr": 275,
+        "renameat2": 276,
+        "seccomp": 277,
+        "getrandom": 278,
+        "memfd_create": 279,
+        "bpf": 280,
+        "execveat": 281,
+        "userfaultfd": 282,
+        "membarrier": 283,
+        "mlock2": 284,
+        "copy_file_range": 285,
+        "preadv2": 286,
+        "pwritev2": 287,
+        "pkey_mprotect": 288,
+        "pkey_alloc": 289,
+        "pkey_free": 290,
+        "statx": 291,
+        "io_pgetevents": 292,
+        "rseq": 293,
+        "kexec_file_load": 294,
+        "pidfd_send_signal": 424,
+        "io_uring_setup": 425,
+        "io_uring_enter": 426,
+        "io_uring_register": 427,
+        "open_tree": 428,
+        "move_mount": 429,
+        "fsopen": 430,
+        "fsconfig": 431,
+        "fsmount": 432,
+        "fspick": 433,
+        "pidfd_open": 434,
+        "clone3": 435,
+        "close_range": 436,
+        "openat2": 437,
+        "pidfd_getfd": 438,
+        "faccessat2": 439,
+        "process_madvise": 440,
+        "epoll_pwait2": 441,
+        "mount_setattr": 442,
+        "quotactl_fd": 443,
+        "landlock_create_ruleset": 444,
+        "landlock_add_rule": 445,
+        "landlock_restrict_self": 446,
+        "memfd_secret": 447,
+        "process_mrelease": 448,
+        "futex_waitv": 449,
+        "set_mempolicy_home_node": 450,
+    ]
+}
diff --git a/Sources/ContainerizationSeccomp/SeccompCompiler.swift b/Sources/ContainerizationSeccomp/SeccompCompiler.swift
new file mode 100644
index 00000000..fa5ab7ce
--- /dev/null
+++ b/Sources/ContainerizationSeccomp/SeccompCompiler.swift
@@ -0,0 +1,421 @@
+//===----------------------------------------------------------------------===//
+// Copyright © 2026 Apple Inc. and the Containerization project authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//===----------------------------------------------------------------------===//
+
+// https://man7.org/linux/man-pages/man2/seccomp.2.html
+
+import ContainerizationOCI
+
+public struct BPFInstruction: Equatable, Sendable {
+    public var code: UInt16
+    public var jt: UInt8
+    public var jf: UInt8
+    public var k: UInt32
+
+    public init(code: UInt16, jt: UInt8, jf: UInt8, k: UInt32) {
+        self.code = code
+        self.jt = jt
+        self.jf = jf
+        self.k = k
+    }
+}
+
+enum BPF {
+    enum InstructionClass {
+        static let ld: UInt16 = 0x00
+        static let jmp: UInt16 = 0x05
+        static let ret: UInt16 = 0x06
+        static let alu: UInt16 = 0x04
+    }
+
+    enum Size {
+        static let w: UInt16 = 0x00
+    }
+
+    enum Mode {
+        static let abs: UInt16 = 0x20
+    }
+
+    enum Source {
+        static let k: UInt16 = 0x00
+    }
+
+    enum JumpTest {
+        static let eq: UInt16 = 0x10
+        static let gt: UInt16 = 0x20
+        static let ge: UInt16 = 0x30
+        static let set: UInt16 = 0x40
+        static let always: UInt16 = 0x00
+    }
+
+    enum ALUOp {
+        static let and: UInt16 = 0x50
+    }
+}
+
+enum SeccompData {
+    static let nr: UInt32 = 0
+    static let arch: UInt32 = 4
+    // args start at offset 16, each is 8 bytes (two 32-bit halves: lo at offset, hi at offset+4)
+    static let args: UInt32 = 16
+}
+
+enum SeccompReturn {
+    static let killThread: UInt32 = 0x0000_0000
+    static let killProcess: UInt32 = 0x8000_0000
+    static let trap: UInt32 = 0x0003_0000
+    static let errno: UInt32 = 0x0005_0000
+    static let trace: UInt32 = 0x7FF0_0000
+    static let log: UInt32 = 0x7FFC_0000
+    static let allow: UInt32 = 0x7FFF_0000
+    static let notify: UInt32 = 0x7FC0_0000
+}
+
+enum AuditArch {
+    static let aarch64: UInt32 = 0xC000_00B7
+}
+
+extension LinuxSeccompFlag {
+    public var kernelFlag: UInt32 {
+        switch self {
+        case .flagLog:
+            return 1 << 1
+        case .flagSpecAllow:
+            return 1 << 2
+        case .flagWaitKillableRecv:
+            return 1 << 5
+        }
+    }
+}
+
+public enum SeccompCompiler {
+    public enum Error: Swift.Error, CustomStringConvertible {
+        case unknownSyscall(String)
+        case invalidArgIndex(UInt)
+
+        public var description: String {
+            switch self {
+            case .unknownSyscall(let name):
+                return "unknown syscall: \(name)"
+            case .invalidArgIndex(let idx):
+                return "invalid syscall arg index: \(idx), must be 0-5"
+            }
+        }
+    }
+
+    /// Compiles an OCI `LinuxSeccomp` configuration into a classic BPF (cBPF)
+    /// filter program.
+    ///
+    /// The kernel evaluates the BPF program on every syscall. The program inspects
+    /// a read-only `seccomp_data` struct:
+    ///
+    ///     struct seccomp_data {
+    ///         int   nr;        // offset 0:  syscall number
+    ///         __u32 arch;      // offset 4:  AUDIT_ARCH_* value
+    ///         __u64 ip;        // offset 8:  instruction pointer (unused)
+    ///         __u64 args[6];   // offset 16: syscall arguments, 8 bytes each
+    ///     };
+    ///
+    /// Each BPF instruction is 8 bytes: a 16-bit opcode, two 8-bit jump offsets
+    /// (jt = jump-true, jf = jump-false, relative to the next instruction), and
+    /// a 32-bit immediate value (k). The program terminates when it executes a
+    /// RET instruction whose k value is the seccomp action (e.g. ALLOW, ERRNO).
+    ///
+    /// EXAMPLE: blocking `mkdirat` with ERRNO(EPERM), default ALLOW
+    ///
+    /// Given this OCI config:
+    ///
+    ///     defaultAction: SCMP_ACT_ALLOW
+    ///     syscalls: [{ names: ["mkdirat"], action: SCMP_ACT_ERRNO, errnoRet: 1 }]
+    ///
+    /// The compiler produces:
+    ///
+    ///     [0] LD_ABS  [seccomp_data.arch]       // load arch into accumulator
+    ///     [1] JEQ     AUDIT_ARCH_AARCH64  1, 0  // match → skip 1 to [3]; miss → fall to [2]
+    ///     [2] RET     KILL_PROCESS              // wrong arch → kill
+    ///     [3] LD_ABS  [seccomp_data.nr]         // load syscall number into accumulator
+    ///     [4] JEQ     34 (mkdirat)        0, 1  // match → fall to [5]; miss → skip 1 to [6]
+    ///     [5] RET     ERRNO|EPERM               // blocked syscall → return -EPERM
+    ///     [6] RET     ALLOW                     // no rule matched → allow
+    ///
+    /// Jump offsets (jt, jf) are relative to the *next* instruction. A JEQ with
+    /// jt=1,jf=0 means: if equal, skip 1 instruction forward; if not equal, fall
+    /// through to the very next instruction (skip 0).
+    ///
+    /// EXAMPLE: blocking multiple syscalls
+    ///
+    /// Each syscall in the rule list becomes a JEQ+RET pair. The syscall number
+    /// stays in the accumulator across rules, so no reload is needed:
+    ///
+    ///     syscalls: [
+    ///       { names: ["mkdirat"],  action: SCMP_ACT_ERRNO, errnoRet: 1 },
+    ///       { names: ["unlinkat"], action: SCMP_ACT_KILL_PROCESS },
+    ///     ]
+    ///
+    ///     [0] LD_ABS  [seccomp_data.arch]
+    ///     [1] JEQ     AUDIT_ARCH_AARCH64  1, 0
+    ///     [2] RET     KILL_PROCESS
+    ///     [3] LD_ABS  [seccomp_data.nr]
+    ///     [4] JEQ     34 (mkdirat)        0, 1  // miss → skip to [6]
+    ///     [5] RET     ERRNO|EPERM
+    ///     [6] JEQ     35 (unlinkat)       0, 1  // miss → skip to [8]
+    ///     [7] RET     KILL_PROCESS
+    ///     [8] RET     ALLOW
+    ///
+    /// For rules with argument filters, the compiler inserts additional LD/JEQ
+    /// sequences between the syscall number match and the action RET. Because BPF
+    /// is a 32-bit machine, 64-bit arguments are compared as two 32-bit halves
+    /// (lo at the base offset, hi at base+4, little-endian).
+    public static func compileFromOCI(config: ContainerizationOCI.LinuxSeccomp) throws -> [BPFInstruction] {
+        var prog: [BPFInstruction] = []
+
+        // 1. Check architecture: load seccomp_data.arch
+        prog.append(BPFInstruction(code: BPF.InstructionClass.ld | BPF.Size.w | BPF.Mode.abs, jt: 0, jf: 0, k: SeccompData.arch))
+
+        // We only support AARCH64 today, so if arch != AARCH64 kill the process.
+        prog.append(BPFInstruction(code: BPF.InstructionClass.jmp | BPF.JumpTest.eq | BPF.Source.k, jt: 1, jf: 0, k: AuditArch.aarch64))
+        prog.append(BPFInstruction(code: BPF.InstructionClass.ret | BPF.Source.k, jt: 0, jf: 0, k: SeccompReturn.killProcess))
+
+        // 2. Load syscall number
+        prog.append(BPFInstruction(code: BPF.InstructionClass.ld | BPF.Size.w | BPF.Mode.abs, jt: 0, jf: 0, k: SeccompData.nr))
+
+        // 3. Per-rule matching
+        for syscall in config.syscalls {
+            let action = mapAction(syscall.action, errnoRet: syscall.errnoRet)
+
+            for name in syscall.names {
+                guard let nr = Self.aarch64SyscallTable[name] ?? UInt32(name) else {
+                    // Skip unknown syscall names that aren't valid numbers
+                    continue
+                }
+
+                if syscall.args.isEmpty {
+                    // Simple case: JEQ nr -> return action, else fall through
+                    // We need: JEQ nr, 0, 1 (if equal, execute next which is RET; else skip RET)
+                    prog.append(BPFInstruction(code: BPF.InstructionClass.jmp | BPF.JumpTest.eq | BPF.Source.k, jt: 0, jf: 1, k: nr))
+                    prog.append(BPFInstruction(code: BPF.InstructionClass.ret | BPF.Source.k, jt: 0, jf: 0, k: action))
+                } else {
+                    // With arg filters: JEQ nr -> arg checks -> return action
+                    // First, build the arg check instructions
+                    let argBlock = try buildArgBlock(args: syscall.args, action: action)
+                    // JEQ nr, 0, skip_arg_block
+                    let skipCount = UInt8(argBlock.count)
+                    prog.append(BPFInstruction(code: BPF.InstructionClass.jmp | BPF.JumpTest.eq | BPF.Source.k, jt: 0, jf: skipCount, k: nr))
+                    prog.append(contentsOf: argBlock)
+                }
+            }
+        }
+
+        // 4. Default action
+        let defaultAction = mapAction(config.defaultAction, errnoRet: config.defaultErrnoRet)
+        prog.append(BPFInstruction(code: BPF.InstructionClass.ret | BPF.Source.k, jt: 0, jf: 0, k: defaultAction))
+
+        return prog
+    }
+
+    /// Map kernel flags from OCI config flags.
+    public static func mapFlags(_ flags: [LinuxSeccompFlag]) -> UInt32 {
+        var result: UInt32 = 0
+        for flag in flags {
+            result |= flag.kernelFlag
+        }
+        return result
+    }
+
+    static func mapAction(_ action: LinuxSeccompAction, errnoRet: UInt?) -> UInt32 {
+        switch action {
+        case .actKill, .actKillThread:
+            return SeccompReturn.killThread
+        case .actKillProcess:
+            return SeccompReturn.killProcess
+        case .actTrap:
+            return SeccompReturn.trap
+        case .actErrno:
+            let errno = UInt32(errnoRet ?? 0) & 0xFFFF
+            return SeccompReturn.errno | errno
+        case .actTrace:
+            return SeccompReturn.trace
+        case .actLog:
+            return SeccompReturn.log
+        case .actAllow:
+            return SeccompReturn.allow
+        case .actNotify:
+            return SeccompReturn.notify
+        }
+    }
+
+    enum JumpField { case jt, jf, k }
+
+    /// Build BPF instructions for argument comparison.
+    ///
+    /// Each argument is 64-bit but BPF operates on 32-bit values, so we compare
+    /// the low and high 32-bit halves separately. All arg conditions must match
+    /// (AND semantics) for the action to be taken.
+    static func buildArgBlock(args: [LinuxSeccompArg], action: UInt32) throws -> [BPFInstruction] {
+        // We build the arg checks. If any check fails, we need to jump past
+        // the remaining checks and the final RET action instruction.
+        // We'll build everything first, then fix up the failure jumps.
+
+        // Each failure jump records the instruction index and which field
+        // holds the jump offset. Most use jf (conditional false branch),
+        // but LT/LE use jt (the "true" branch of JGT/JGE is the fail path)
+        // and NE uses k (unconditional JA jump).
+        var checks: [(instructions: [BPFInstruction], failureJumps: [(index: Int, field: JumpField)])] = []
+
+        for arg in args {
+            guard arg.index <= 5 else {
+                throw Error.invalidArgIndex(arg.index)
+            }
+
+            let check = try buildSingleArgCheck(arg: arg)
+            checks.append(check)
+        }
+
+        // Flatten all checks and add the final RET.
+        var flat: [BPFInstruction] = []
+        var failureJumps: [(index: Int, field: JumpField)] = []
+
+        for check in checks {
+            let baseIndex = flat.count
+            flat.append(contentsOf: check.instructions)
+            for (idx, field) in check.failureJumps {
+                failureJumps.append((baseIndex + idx, field))
+            }
+        }
+
+        // Append the success RET
+        flat.append(BPFInstruction(code: BPF.InstructionClass.ret | BPF.Source.k, jt: 0, jf: 0, k: action))
+
+        // Fix up failure jumps to point past the RET.
+        // BPF jump offsets are relative to the next instruction.
+        // jump offset = target - (current + 1) = flat.count - idx - 1
+        for (idx, field) in failureJumps {
+            let jumpOffset = flat.count - idx - 1
+            switch field {
+            case .jt: flat[idx].jt = UInt8(jumpOffset)
+            case .jf: flat[idx].jf = UInt8(jumpOffset)
+            case .k: flat[idx].k = UInt32(jumpOffset)
+            }
+        }
+
+        // Reload the syscall number after arg checks for the next rule
+        flat.append(BPFInstruction(code: BPF.InstructionClass.ld | BPF.Size.w | BPF.Mode.abs, jt: 0, jf: 0, k: SeccompData.nr))
+
+        return flat
+    }
+
+    /// Build instructions for a single argument comparison.
+    /// Returns instructions and failure jumps (index + which field to patch).
+    static func buildSingleArgCheck(arg: LinuxSeccompArg) throws -> (instructions: [BPFInstruction], failureJumps: [(index: Int, field: JumpField)]) {
+        let argOffset = SeccompData.args + UInt32(arg.index) * 8
+        let loOffset = argOffset  // low 32 bits
+        let hiOffset = argOffset + 4  // high 32 bits
+
+        let valueLo = UInt32(arg.value & 0xFFFF_FFFF)
+        let valueHi = UInt32(arg.value >> 32)
+        let valueTwoLo = UInt32(arg.valueTwo & 0xFFFF_FFFF)
+        let valueTwoHi = UInt32(arg.valueTwo >> 32)
+
+        let ldAbs = BPF.InstructionClass.ld | BPF.Size.w | BPF.Mode.abs
+        let jmpEq = BPF.InstructionClass.jmp | BPF.JumpTest.eq | BPF.Source.k
+        let jmpGt = BPF.InstructionClass.jmp | BPF.JumpTest.gt | BPF.Source.k
+        let jmpGe = BPF.InstructionClass.jmp | BPF.JumpTest.ge | BPF.Source.k
+        let jmpAlways = BPF.InstructionClass.jmp | BPF.JumpTest.always | BPF.Source.k
+        let aluAnd = BPF.InstructionClass.alu | BPF.ALUOp.and | BPF.Source.k
+
+        var insts: [BPFInstruction] = []
+        var fails: [(index: Int, field: JumpField)] = []
+
+        switch arg.op {
+        case .opEqualTo:
+            // EQ: both halves must match. Fail (jf) if either doesn't.
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: hiOffset))
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 0, k: valueHi))
+            fails.append((insts.count - 1, .jf))
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: loOffset))
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 0, k: valueLo))
+            fails.append((insts.count - 1, .jf))
+
+        case .opNotEqual:
+            // NE: succeed if either half differs. Fail if both match.
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: hiOffset))
+            // hi differs → success (skip 3 past lo check)
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 3, k: valueHi))
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: loOffset))
+            // lo differs → success (skip 1 past fail jump)
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 1, k: valueLo))
+            // Both matched → NE fails. JA uses k for the offset.
+            insts.append(BPFInstruction(code: jmpAlways, jt: 0, jf: 0, k: 0))
+            fails.append((insts.count - 1, .k))
+
+        case .opGreaterThan:
+            // GT: hi > v_hi → success; hi == v_hi → check lo > v_lo; else fail.
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: hiOffset))
+            insts.append(BPFInstruction(code: jmpGt, jt: 3, jf: 0, k: valueHi))
+            // hi not greater; check if equal
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 0, k: valueHi))
+            fails.append((insts.count - 1, .jf))  // hi < v_hi → fail
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: loOffset))
+            insts.append(BPFInstruction(code: jmpGt, jt: 0, jf: 0, k: valueLo))
+            fails.append((insts.count - 1, .jf))  // lo <= v_lo → fail
+
+        case .opGreaterEqual:
+            // GE: hi > v_hi → success; hi == v_hi → check lo >= v_lo; else fail.
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: hiOffset))
+            insts.append(BPFInstruction(code: jmpGt, jt: 3, jf: 0, k: valueHi))
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 0, k: valueHi))
+            fails.append((insts.count - 1, .jf))
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: loOffset))
+            insts.append(BPFInstruction(code: jmpGe, jt: 0, jf: 0, k: valueLo))
+            fails.append((insts.count - 1, .jf))
+
+        case .opLessThan:
+            // LT: hi < v_hi → success; hi == v_hi → check lo < v_lo; else fail.
+            // JGT true means hi > v_hi → fail (on jt).
+            // JGT false + JEQ false means hi < v_hi → success (skip past lo check).
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: hiOffset))
+            insts.append(BPFInstruction(code: jmpGt, jt: 0, jf: 0, k: valueHi))
+            fails.append((insts.count - 1, .jt))  // hi > v_hi → fail
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 3, k: valueHi))
+            // hi < v_hi → skip 3 to success (past lo check)
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: loOffset))
+            insts.append(BPFInstruction(code: jmpGe, jt: 0, jf: 0, k: valueLo))
+            fails.append((insts.count - 1, .jt))  // lo >= v_lo → fail
+
+        case .opLessEqual:
+            // LE: hi < v_hi → success; hi == v_hi → check lo <= v_lo; else fail.
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: hiOffset))
+            insts.append(BPFInstruction(code: jmpGt, jt: 0, jf: 0, k: valueHi))
+            fails.append((insts.count - 1, .jt))  // hi > v_hi → fail
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 3, k: valueHi))
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: loOffset))
+            insts.append(BPFInstruction(code: jmpGt, jt: 0, jf: 0, k: valueLo))
+            fails.append((insts.count - 1, .jt))  // lo > v_lo → fail
+
+        case .opMaskedEqual:
+            // MASKED_EQ: (data & value) == valueTwo, checked per-half.
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: hiOffset))
+            insts.append(BPFInstruction(code: aluAnd, jt: 0, jf: 0, k: UInt32(valueHi)))
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 0, k: valueTwoHi))
+            fails.append((insts.count - 1, .jf))
+            insts.append(BPFInstruction(code: ldAbs, jt: 0, jf: 0, k: loOffset))
+            insts.append(BPFInstruction(code: aluAnd, jt: 0, jf: 0, k: UInt32(valueLo)))
+            insts.append(BPFInstruction(code: jmpEq, jt: 0, jf: 0, k: valueTwoLo))
+            fails.append((insts.count - 1, .jf))
+        }
+
+        return (insts, fails)
+    }
+}
diff --git a/Sources/Integration/ContainerTests.swift b/Sources/Integration/ContainerTests.swift
index 97f5866e..e8b0a763 100644
--- a/Sources/Integration/ContainerTests.swift
+++ b/Sources/Integration/ContainerTests.swift
@@ -4344,4 +4344,310 @@ extension IntegrationSuite {
             throw error
         }
     }
+
+    func testSeccompBlockSyscall() async throws {
+        let id = "test-seccomp-block-syscall"
+
+        let bs = try await bootstrap(id)
+        let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            // Block mkdirat with EPERM.
+            config.process.arguments = ["mkdir", "/tmp/seccomp-test"]
+            config.bootLog = bs.bootLog
+            var seccomp = SeccompProfile(defaultAction: .allow)
+            seccomp.syscalls = [SeccompProfile.Rule(syscalls: ["mkdirat"], action: .errno(1))]
+            config.seccomp = seccomp
+        }
+
+        try await container.create()
+        try await container.start()
+
+        let status = try await container.wait()
+        try await container.stop()
+
+        guard status.exitCode != 0 else {
+            throw IntegrationError.assert(msg: "expected non-zero exit code when mkdirat is blocked by seccomp")
+        }
+    }
+
+    func testSeccompBlockSyscallExplicitErrno() async throws {
+        let id = "test-seccomp-block-explicit-errno"
+
+        let bs = try await bootstrap(id)
+        let errBuffer = BufferWriter()
+        let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            // Block mkdirat with errnoRet=100 (ENETDOWN). Verify the specific
+            // errno value passes through by checking for "Network is down".
+            config.process.arguments = ["sh", "-c", "mkdir /tmp/seccomp-test"]
+            config.process.noNewPrivileges = true
+            config.process.stderr = errBuffer
+            config.bootLog = bs.bootLog
+            var seccomp = SeccompProfile(defaultAction: .allow)
+            seccomp.syscalls = [SeccompProfile.Rule(syscalls: ["mkdirat"], action: .errno(100))]
+            config.seccomp = seccomp
+        }
+
+        try await container.create()
+        try await container.start()
+
+        let status = try await container.wait()
+        try await container.stop()
+
+        guard status.exitCode != 0 else {
+            throw IntegrationError.assert(msg: "expected non-zero exit code when mkdirat is blocked by seccomp")
+        }
+
+        let stderr = String(data: errBuffer.data, encoding: .utf8) ?? ""
+        guard stderr.contains("Network is down") else {
+            throw IntegrationError.assert(msg: "expected 'Network is down' (ENETDOWN) in stderr, got: \(stderr)")
+        }
+    }
+
+    func testSeccompAllowList() async throws {
+        let id = "test-seccomp-allow-list"
+
+        let bs = try await bootstrap(id)
+        let buffer = BufferWriter()
+        let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            config.process.arguments = ["echo", "seccomp-ok"]
+            config.process.noNewPrivileges = true
+            config.process.stdout = buffer
+            config.bootLog = bs.bootLog
+            config.seccomp = SeccompProfile(defaultAction: .allow)
+        }
+
+        try await container.create()
+        try await container.start()
+
+        let status = try await container.wait()
+        try await container.stop()
+
+        guard status.exitCode == 0 else {
+            throw IntegrationError.assert(msg: "process status \(status) != 0")
+        }
+
+        guard let output = String(data: buffer.data, encoding: .utf8) else {
+            throw IntegrationError.assert(msg: "failed to convert stdout to UTF8")
+        }
+
+        guard output.contains("seccomp-ok") else {
+            throw IntegrationError.assert(msg: "expected 'seccomp-ok' in output, got: \(output)")
+        }
+    }
+
+    func testSeccompKillProcess() async throws {
+        let id = "test-seccomp-kill-process"
+
+        let bs = try await bootstrap(id)
+        let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            // Block mkdirat with KILL_PROCESS. The kernel sends SIGSYS (31),
+            // which the runtime reports as exit code 128 + 31 = 159.
+            config.process.arguments = ["mkdir", "/tmp/seccomp-test"]
+            config.process.noNewPrivileges = true
+            config.bootLog = bs.bootLog
+            var seccomp = SeccompProfile(defaultAction: .allow)
+            seccomp.syscalls = [SeccompProfile.Rule(syscalls: ["mkdirat"], action: .killProcess)]
+            config.seccomp = seccomp
+        }
+
+        try await container.create()
+        try await container.start()
+
+        let status = try await container.wait()
+        try await container.stop()
+
+        // SECCOMP_RET_KILL_PROCESS sends SIGSYS (31). Exit code = 128 + 31 = 159.
+        let expectedExitCode: Int32 = 128 + 31
+        guard status.exitCode == expectedExitCode else {
+            throw IntegrationError.assert(msg: "expected exit code \(expectedExitCode) (SIGSYS), got \(status.exitCode)")
+        }
+    }
+
+    func testSeccompExec() async throws {
+        let id = "test-seccomp-exec"
+
+        let bs = try await bootstrap(id)
+        let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            config.process.arguments = ["sleep", "30"]
+            config.process.noNewPrivileges = true
+            config.bootLog = bs.bootLog
+            var seccomp = SeccompProfile(defaultAction: .allow)
+            seccomp.syscalls = [SeccompProfile.Rule(syscalls: ["mkdirat"], action: .errno(1))]
+            config.seccomp = seccomp
+        }
+
+        try await container.create()
+        try await container.start()
+
+        do {
+            let exec = try await container.exec("exec-seccomp") { process in
+                process.arguments = ["mkdir", "/tmp/seccomp-test"]
+            }
+            try await exec.start()
+            let execStatus = try await exec.wait()
+            try await exec.delete()
+
+            guard execStatus.exitCode != 0 else {
+                throw IntegrationError.assert(msg: "expected non-zero exit code for exec when mkdirat is blocked")
+            }
+
+            try await container.kill(SIGKILL)
+            try await container.wait()
+            try await container.stop()
+        } catch {
+            try? await container.stop()
+            throw error
+        }
+    }
+
+    func testSeccompOCIDefaultCapabilityGatedRules() async throws {
+        let id = "test-seccomp-ocidefault-caps"
+
+        let bs = try await bootstrap(id)
+
+        // Without CAP_SYS_ADMIN: mount is not in the seccomp allowlist.
+        // Run mount directly — seccomp returns EPERM, mount exits non-zero.
+        let denied = try LinuxContainer("\(id)-denied", rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            config.process.arguments = ["mount", "-t", "tmpfs", "tmpfs", "/tmp"]
+            config.process.noNewPrivileges = true
+            config.process.capabilities = .defaultOCICapabilities
+            config.bootLog = bs.bootLog
+            config.seccomp = .defaultProfile
+        }
+
+        try await denied.create()
+        try await denied.start()
+        var status = try await denied.wait()
+        try await denied.stop()
+
+        guard status.exitCode != 0 else {
+            throw IntegrationError.assert(msg: "expected mount to fail without CAP_SYS_ADMIN")
+        }
+
+        // With CAP_SYS_ADMIN: mount is added to the seccomp allowlist
+        // and the process has the kernel capability to perform it.
+        let allowed = try LinuxContainer("\(id)-allowed", rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            config.process.arguments = ["mount", "-t", "tmpfs", "tmpfs", "/tmp"]
+            config.process.noNewPrivileges = true
+            config.process.capabilities = LinuxCapabilities(capabilities: [.sysAdmin])
+            config.bootLog = bs.bootLog
+            config.seccomp = .defaultProfile
+        }
+
+        try await allowed.create()
+        try await allowed.start()
+        status = try await allowed.wait()
+        try await allowed.stop()
+
+        guard status.exitCode == 0 else {
+            throw IntegrationError.assert(msg: "expected mount to succeed with CAP_SYS_ADMIN, got exit code \(status.exitCode)")
+        }
+    }
+
+    func testSeccompLogAction() async throws {
+        let id = "test-seccomp-log"
+
+        let bs = try await bootstrap(id)
+        let buffer = BufferWriter()
+        let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            // Log mkdirat via SECCOMP_RET_LOG — the syscall succeeds but
+            // the kernel writes an audit entry to the ring buffer.
+            // Run mkdir to trigger it, then dmesg to read the log.
+            config.process.arguments = ["sleep", "30"]
+            config.bootLog = bs.bootLog
+            var seccomp = SeccompProfile(defaultAction: .allow)
+            seccomp.syscalls = [SeccompProfile.Rule(syscalls: ["mkdirat"], action: .log)]
+            config.seccomp = seccomp
+        }
+
+        try await container.create()
+        try await container.start()
+
+        do {
+            // Trigger the logged syscall.
+            let mkdirExec = try await container.exec("exec-mkdir") { process in
+                process.arguments = ["mkdir", "/tmp/seccomp-log-test"]
+            }
+            try await mkdirExec.start()
+            let mkdirStatus = try await mkdirExec.wait()
+            try await mkdirExec.delete()
+
+            // mkdir should succeed. SECCOMP_RET_LOG allows the syscall.
+            guard mkdirStatus.exitCode == 0 else {
+                throw IntegrationError.assert(msg: "expected mkdir to succeed with log action, got exit code \(mkdirStatus.exitCode)")
+            }
+
+            let dmesgExec = try await container.exec("exec-dmesg") { process in
+                process.arguments = ["dmesg"]
+                process.capabilities = LinuxCapabilities(capabilities: [.syslog])
+                process.stdout = buffer
+            }
+            try await dmesgExec.start()
+            let dmesgStatus = try await dmesgExec.wait()
+            try await dmesgExec.delete()
+
+            guard dmesgStatus.exitCode == 0 else {
+                throw IntegrationError.assert(msg: "dmesg failed with exit code \(dmesgStatus.exitCode)")
+            }
+
+            guard let output = String(data: buffer.data, encoding: .utf8) else {
+                throw IntegrationError.assert(msg: "failed to convert dmesg output to UTF8")
+            }
+
+            // type=1326 is AUDIT_SECCOMP, syscall=34 is mkdirat (specifically on aarch64).
+            guard output.contains("type=1326") && output.contains("syscall=34") else {
+                throw IntegrationError.assert(msg: "expected seccomp audit log for mkdirat (type=1326 syscall=34), got: \(output.suffix(500))")
+            }
+
+            try await container.kill(SIGKILL)
+            try await container.wait()
+            try await container.stop()
+        } catch {
+            try? await container.stop()
+            throw error
+        }
+    }
+
+    func testSeccompSyscallByNumber() async throws {
+        let id = "test-seccomp-syscall-number"
+
+        let bs = try await bootstrap(id)
+
+        // Block mkdirat by name.
+        let byName = try LinuxContainer("\(id)-name", rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            config.process.arguments = ["mkdir", "/tmp/seccomp-test"]
+            config.process.noNewPrivileges = true
+            config.bootLog = bs.bootLog
+            var seccomp = SeccompProfile(defaultAction: .allow)
+            seccomp.syscalls = [SeccompProfile.Rule(syscalls: [.name("mkdirat")], action: .errno(1))]
+            config.seccomp = seccomp
+        }
+
+        try await byName.create()
+        try await byName.start()
+        var status = try await byName.wait()
+        try await byName.stop()
+
+        guard status.exitCode != 0 else {
+            throw IntegrationError.assert(msg: "expected mkdir to fail when blocked by name")
+        }
+
+        // Block mkdirat by raw number (34).
+        let byNumber = try LinuxContainer("\(id)-number", rootfs: bs.rootfs, vmm: bs.vmm) { config in
+            config.process.arguments = ["mkdir", "/tmp/seccomp-test"]
+            config.process.noNewPrivileges = true
+            config.bootLog = bs.bootLog
+            var seccomp = SeccompProfile(defaultAction: .allow)
+            seccomp.syscalls = [SeccompProfile.Rule(syscalls: [.number(34)], action: .errno(1))]
+            config.seccomp = seccomp
+        }
+
+        try await byNumber.create()
+        try await byNumber.start()
+        status = try await byNumber.wait()
+        try await byNumber.stop()
+
+        guard status.exitCode != 0 else {
+            throw IntegrationError.assert(msg: "expected mkdir to fail when blocked by number (34)")
+        }
+    }
 }
diff --git a/Sources/Integration/Suite.swift b/Sources/Integration/Suite.swift
index 2f0a079d..5049ef31 100644
--- a/Sources/Integration/Suite.swift
+++ b/Sources/Integration/Suite.swift
@@ -376,6 +376,14 @@ struct IntegrationSuite: AsyncParsableCommand {
                 Test("container noNewPrivileges exec", testNoNewPrivilegesExec),
                 Test("container workingDir created", testWorkingDirCreated),
                 Test("container workingDir exec created", testWorkingDirExecCreated),
+                Test("container seccomp block syscall", testSeccompBlockSyscall),
+                Test("container seccomp block syscall explicit errno", testSeccompBlockSyscallExplicitErrno),
+                Test("container seccomp allow list", testSeccompAllowList),
+                Test("container seccomp kill process", testSeccompKillProcess),
+                Test("container seccomp exec", testSeccompExec),
+                Test("container seccomp defaultProfile capability gated rules", testSeccompOCIDefaultCapabilityGatedRules),
+                Test("container seccomp log action", testSeccompLogAction),
+                Test("container seccomp syscall by number", testSeccompSyscallByNumber),
 
                 // Pods
                 Test("pod single container", testPodSingleContainer),
diff --git a/Tests/ContainerizationSeccompTests/BPFSimulator.swift b/Tests/ContainerizationSeccompTests/BPFSimulator.swift
new file mode 100644
index 00000000..d67e654f
--- /dev/null
+++ b/Tests/ContainerizationSeccompTests/BPFSimulator.swift
@@ -0,0 +1,139 @@
+//===----------------------------------------------------------------------===//
+// Copyright © 2026 Apple Inc. and the Containerization project authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//===----------------------------------------------------------------------===//
+
+import ContainerizationSeccomp
+
+/// A classic BPF (cBPF) interpreter for seccomp filters.
+///
+/// Executes a compiled BPF program against a simulated `seccomp_data` struct,
+/// returning the seccomp return value (action + data). This allows testing
+/// filter semantics on any OS.
+enum BPFSimulator {
+    /// Simulates a seccomp BPF program against the given inputs.
+    ///
+    /// The simulated `seccomp_data` layout (little-endian):
+    /// - offset 0:  nr (syscall number, 32-bit)
+    /// - offset 4:  arch (AUDIT_ARCH_*, 32-bit)
+    /// - offset 8:  instruction_pointer (64-bit, unused)
+    /// - offset 16: args[0] (64-bit)
+    /// - offset 24: args[1] (64-bit)
+    /// - ... up to args[5] at offset 56
+    ///
+    /// Returns the 32-bit seccomp return value, or nil if the program is invalid.
+    static func run(
+        _ program: [BPFInstruction],
+        syscallNr: UInt32,
+        arch: UInt32,
+        args: [UInt64] = []
+    ) -> UInt32? {
+        // Build seccomp_data as a byte buffer (64 bytes)
+        var data = [UInt8](repeating: 0, count: 64)
+
+        // nr at offset 0 (little-endian)
+        writeU32(&data, offset: 0, value: syscallNr)
+        // arch at offset 4
+        writeU32(&data, offset: 4, value: arch)
+        // instruction_pointer at offset 8 (leave as 0)
+        // args at offset 16, each 8 bytes
+        for (i, arg) in args.prefix(6).enumerated() {
+            let offset = 16 + i * 8
+            writeU32(&data, offset: offset, value: UInt32(arg & 0xFFFF_FFFF))
+            writeU32(&data, offset: offset + 4, value: UInt32(arg >> 32))
+        }
+
+        var accumulator: UInt32 = 0
+        var pc = 0
+
+        while pc < program.count {
+            let inst = program[pc]
+            let cls = inst.code & 0x07
+
+            switch cls {
+            case 0x00:  // BPF_LD
+                let mode = inst.code & 0xE0
+                guard mode == 0x20 else {
+                    return nil
+                }
+                let offset = Int(inst.k)
+                guard offset + 4 <= data.count else { return nil }
+                accumulator = readU32(data, offset: offset)
+                pc += 1
+
+            case 0x04:  // BPF_ALU
+                let op = inst.code & 0xF0
+                let src = inst.code & 0x08
+                let operand: UInt32 = src == 0 ? inst.k : 0  // BPF_K vs BPF_X (we only support K)
+                switch op {
+                case 0x00: accumulator &+= operand  // ADD
+                case 0x10: accumulator &-= operand  // SUB
+                case 0x20: accumulator &*= operand  // MUL
+                case 0x30:
+                    guard operand != 0 else { return nil }
+                    accumulator /= operand  // DIV
+                case 0x40: accumulator |= operand  // OR
+                case 0x50: accumulator &= operand  // AND
+                case 0x60: accumulator <<= operand  // LSH
+                case 0x70: accumulator >>= operand  // RSH
+                case 0x80: accumulator = ~accumulator  // NEG
+                default: return nil
+                }
+                pc += 1
+
+            case 0x05:  // BPF_JMP
+                let op = inst.code & 0xF0
+                let src = inst.code & 0x08
+                let operand: UInt32 = src == 0 ? inst.k : 0
+
+                switch op {
+                case 0x00:  // JA (unconditional)
+                    pc += 1 + Int(inst.k)
+                case 0x10:  // JEQ
+                    pc += 1 + Int(accumulator == operand ? inst.jt : inst.jf)
+                case 0x20:  // JGT
+                    pc += 1 + Int(accumulator > operand ? inst.jt : inst.jf)
+                case 0x30:  // JGE
+                    pc += 1 + Int(accumulator >= operand ? inst.jt : inst.jf)
+                case 0x40:  // JSET
+                    pc += 1 + Int((accumulator & operand) != 0 ? inst.jt : inst.jf)
+                default:
+                    return nil
+                }
+
+            case 0x06:  // BPF_RET
+                return inst.k
+
+            default:
+                return nil
+            }
+        }
+
+        return nil
+    }
+
+    private static func writeU32(_ data: inout [UInt8], offset: Int, value: UInt32) {
+        data[offset] = UInt8(value & 0xFF)
+        data[offset + 1] = UInt8((value >> 8) & 0xFF)
+        data[offset + 2] = UInt8((value >> 16) & 0xFF)
+        data[offset + 3] = UInt8((value >> 24) & 0xFF)
+    }
+
+    private static func readU32(_ data: [UInt8], offset: Int) -> UInt32 {
+        UInt32(data[offset])
+            | (UInt32(data[offset + 1]) << 8)
+            | (UInt32(data[offset + 2]) << 16)
+            | (UInt32(data[offset + 3]) << 24)
+    }
+}
diff --git a/Tests/ContainerizationSeccompTests/SeccompCompilerTests.swift b/Tests/ContainerizationSeccompTests/SeccompCompilerTests.swift
new file mode 100644
index 00000000..102caa13
--- /dev/null
+++ b/Tests/ContainerizationSeccompTests/SeccompCompilerTests.swift
@@ -0,0 +1,369 @@
+//===----------------------------------------------------------------------===//
+// Copyright © 2026 Apple Inc. and the Containerization project authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//===----------------------------------------------------------------------===//
+
+import ContainerizationOCI
+import ContainerizationSeccomp
+import Testing
+
+private let aarch64: UInt32 = 0xC000_00B7
+private let retAllow: UInt32 = 0x7FFF_0000
+private let retKillProcess: UInt32 = 0x8000_0000
+
+private func retErrno(_ errno: UInt32) -> UInt32 { 0x0005_0000 | errno }
+
+private func makeConfig(
+    defaultAction: LinuxSeccompAction = .actAllow,
+    defaultErrnoRet: UInt? = nil,
+    syscalls: [LinuxSyscall] = []
+) -> LinuxSeccomp {
+    LinuxSeccomp(
+        defaultAction: defaultAction,
+        defaultErrnoRet: defaultErrnoRet,
+        architectures: [],
+        flags: [],
+        listenerPath: "",
+        listenerMetadata: "",
+        syscalls: syscalls
+    )
+}
+
+@Suite("SeccompCompiler")
+struct SeccompCompilerTests {
+    // MARK: - Architecture check
+
+    @Test("kills process on wrong architecture")
+    func wrongArchKills() throws {
+        let prog = try SeccompCompiler.compileFromOCI(config: makeConfig())
+        let result = BPFSimulator.run(prog, syscallNr: 0, arch: 0x1234_5678)
+        #expect(result == retKillProcess)
+    }
+
+    @Test("allows syscall on aarch64")
+    func correctArchAllows() throws {
+        let prog = try SeccompCompiler.compileFromOCI(config: makeConfig())
+        let result = BPFSimulator.run(prog, syscallNr: 56, arch: aarch64)  // openat
+        #expect(result == retAllow)
+    }
+
+    // MARK: - Default action
+
+    @Test("returns default allow when no rules match")
+    func defaultAllow() throws {
+        let prog = try SeccompCompiler.compileFromOCI(config: makeConfig(defaultAction: .actAllow))
+        let result = BPFSimulator.run(prog, syscallNr: 56, arch: aarch64)
+        #expect(result == retAllow)
+    }
+
+    @Test("returns default errno when no rules match")
+    func defaultErrno() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                defaultAction: .actErrno,
+                defaultErrnoRet: 1
+            ))
+        let result = BPFSimulator.run(prog, syscallNr: 56, arch: aarch64)
+        #expect(result == retErrno(1))
+    }
+
+    // MARK: - Simple syscall blocking
+
+    @Test("blocks specific syscall with errno, allows others")
+    func blockSyscallErrno() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                syscalls: [
+                    LinuxSyscall(names: ["mkdirat"], action: .actErrno, errnoRet: 1, args: [])
+                ]
+            ))
+
+        // mkdirat (34) → ERRNO|1
+        #expect(BPFSimulator.run(prog, syscallNr: 34, arch: aarch64) == retErrno(1))
+        // openat (56) → ALLOW (default)
+        #expect(BPFSimulator.run(prog, syscallNr: 56, arch: aarch64) == retAllow)
+        // read (63) → ALLOW (default)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64) == retAllow)
+    }
+
+    @Test("blocks specific syscall with kill process")
+    func blockSyscallKill() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                syscalls: [
+                    LinuxSyscall(names: ["mkdirat"], action: .actKillProcess, errnoRet: nil, args: [])
+                ]
+            ))
+
+        #expect(BPFSimulator.run(prog, syscallNr: 34, arch: aarch64) == retKillProcess)
+        #expect(BPFSimulator.run(prog, syscallNr: 56, arch: aarch64) == retAllow)
+    }
+
+    @Test("errno passes through explicit errno value")
+    func errnoExplicitValue() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                syscalls: [
+                    LinuxSyscall(names: ["mkdirat"], action: .actErrno, errnoRet: 100, args: [])
+                ]
+            ))
+
+        // ENETDOWN = 100
+        #expect(BPFSimulator.run(prog, syscallNr: 34, arch: aarch64) == retErrno(100))
+    }
+
+    // MARK: - Multiple rules and syscalls
+
+    @Test("multiple rules apply independently")
+    func multipleRules() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                syscalls: [
+                    LinuxSyscall(names: ["mkdirat"], action: .actErrno, errnoRet: 1, args: []),
+                    LinuxSyscall(names: ["unlinkat"], action: .actKillProcess, errnoRet: nil, args: []),
+                ]
+            ))
+
+        #expect(BPFSimulator.run(prog, syscallNr: 34, arch: aarch64) == retErrno(1))  // mkdirat
+        #expect(BPFSimulator.run(prog, syscallNr: 35, arch: aarch64) == retKillProcess)  // unlinkat
+        #expect(BPFSimulator.run(prog, syscallNr: 56, arch: aarch64) == retAllow)  // openat
+    }
+
+    @Test("multiple names in one rule all match")
+    func multipleNamesOneRule() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                syscalls: [
+                    LinuxSyscall(names: ["mkdirat", "unlinkat", "symlinkat"], action: .actErrno, errnoRet: 1, args: [])
+                ]
+            ))
+
+        #expect(BPFSimulator.run(prog, syscallNr: 34, arch: aarch64) == retErrno(1))  // mkdirat
+        #expect(BPFSimulator.run(prog, syscallNr: 35, arch: aarch64) == retErrno(1))  // unlinkat
+        #expect(BPFSimulator.run(prog, syscallNr: 36, arch: aarch64) == retErrno(1))  // symlinkat
+        #expect(BPFSimulator.run(prog, syscallNr: 56, arch: aarch64) == retAllow)  // openat
+    }
+
+    // MARK: - Unknown syscall names
+
+    @Test("unknown syscall names are silently skipped")
+    func unknownSyscallSkipped() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                syscalls: [
+                    LinuxSyscall(names: ["open"], action: .actErrno, errnoRet: 1, args: [])
+                ]
+            ))
+
+        // "open" doesn't exist on aarch64, no rule emitted, everything allowed
+        #expect(BPFSimulator.run(prog, syscallNr: 56, arch: aarch64) == retAllow)
+    }
+
+    @Test("mixed known/unknown names: known names still match")
+    func mixedKnownUnknown() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                syscalls: [
+                    LinuxSyscall(names: ["open", "mkdirat"], action: .actErrno, errnoRet: 1, args: [])
+                ]
+            ))
+
+        #expect(BPFSimulator.run(prog, syscallNr: 34, arch: aarch64) == retErrno(1))  // mkdirat matched
+        #expect(BPFSimulator.run(prog, syscallNr: 56, arch: aarch64) == retAllow)  // openat unaffected
+    }
+
+    // MARK: - Argument filtering
+
+    @Test("equalTo arg filter matches exact value")
+    func argEqualTo() throws {
+        // Allow personality(0) only, block everything else via default
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                defaultAction: .actErrno,
+                defaultErrnoRet: 1,
+                syscalls: [
+                    LinuxSyscall(
+                        names: ["personality"],
+                        action: .actAllow,
+                        errnoRet: nil,
+                        args: [LinuxSeccompArg(index: 0, value: 0, valueTwo: 0, op: .opEqualTo)]
+                    )
+                ]
+            ))
+
+        // personality(0) → ALLOW
+        #expect(BPFSimulator.run(prog, syscallNr: 92, arch: aarch64, args: [0]) == retAllow)
+        // personality(8) → ERRNO (arg doesn't match)
+        #expect(BPFSimulator.run(prog, syscallNr: 92, arch: aarch64, args: [8]) == retErrno(1))
+        // personality(0xFFFFFFFF) → ERRNO
+        #expect(BPFSimulator.run(prog, syscallNr: 92, arch: aarch64, args: [0xFFFF_FFFF]) == retErrno(1))
+    }
+
+    @Test("notEqual arg filter blocks exact value")
+    func argNotEqual() throws {
+        // Allow socket() unless arg0 == 40 (AF_VSOCK)
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                defaultAction: .actErrno,
+                defaultErrnoRet: 1,
+                syscalls: [
+                    LinuxSyscall(
+                        names: ["socket"],
+                        action: .actAllow,
+                        errnoRet: nil,
+                        args: [LinuxSeccompArg(index: 0, value: 40, valueTwo: 0, op: .opNotEqual)]
+                    )
+                ]
+            ))
+
+        // socket(AF_INET=2) → ALLOW (not 40)
+        #expect(BPFSimulator.run(prog, syscallNr: 198, arch: aarch64, args: [2]) == retAllow)
+        // socket(AF_VSOCK=40) → ERRNO (arg == 40, NE fails)
+        #expect(BPFSimulator.run(prog, syscallNr: 198, arch: aarch64, args: [40]) == retErrno(1))
+        // socket(AF_UNIX=1) → ALLOW
+        #expect(BPFSimulator.run(prog, syscallNr: 198, arch: aarch64, args: [1]) == retAllow)
+    }
+
+    @Test("maskedEqual arg filter checks flag mask")
+    func argMaskedEqual() throws {
+        // allow clone if (flags & 0x7E020000) == 0
+        let cloneMask: UInt64 = 2_114_060_288  // 0x7E020000
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                defaultAction: .actErrno,
+                defaultErrnoRet: 1,
+                syscalls: [
+                    LinuxSyscall(
+                        names: ["clone"],
+                        action: .actAllow,
+                        errnoRet: nil,
+                        args: [LinuxSeccompArg(index: 0, value: cloneMask, valueTwo: 0, op: .opMaskedEqual)]
+                    )
+                ]
+            ))
+
+        // clone(SIGCHLD=17) → ALLOW (no namespace flags set)
+        #expect(BPFSimulator.run(prog, syscallNr: 220, arch: aarch64, args: [17]) == retAllow)
+        // clone(CLONE_NEWUSER=0x10000000) → ERRNO (namespace flag set)
+        #expect(BPFSimulator.run(prog, syscallNr: 220, arch: aarch64, args: [0x1000_0000]) == retErrno(1))
+        // clone(CLONE_NEWPID=0x20000000) → ERRNO
+        #expect(BPFSimulator.run(prog, syscallNr: 220, arch: aarch64, args: [0x2000_0000]) == retErrno(1))
+        // clone(SIGCHLD | CLONE_THREAD=0x10000) → ALLOW (CLONE_THREAD not in mask)
+        #expect(BPFSimulator.run(prog, syscallNr: 220, arch: aarch64, args: [UInt64(17 | 0x10000)]) == retAllow)
+    }
+
+    @Test("greaterThan arg filter")
+    func argGreaterThan() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                defaultAction: .actErrno,
+                defaultErrnoRet: 1,
+                syscalls: [
+                    LinuxSyscall(
+                        names: ["read"],
+                        action: .actAllow,
+                        errnoRet: nil,
+                        args: [LinuxSeccompArg(index: 2, value: 0, valueTwo: 0, op: .opGreaterThan)]
+                    )
+                ]
+            ))
+
+        // read(fd, buf, count=1) → ALLOW (1 > 0)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [0, 0, 1]) == retAllow)
+        // read(fd, buf, count=0) → ERRNO (0 is not > 0)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [0, 0, 0]) == retErrno(1))
+    }
+
+    @Test("lessEqual arg filter")
+    func argLessEqual() throws {
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                defaultAction: .actErrno,
+                defaultErrnoRet: 1,
+                syscalls: [
+                    LinuxSyscall(
+                        names: ["read"],
+                        action: .actAllow,
+                        errnoRet: nil,
+                        args: [LinuxSeccompArg(index: 2, value: 4096, valueTwo: 0, op: .opLessEqual)]
+                    )
+                ]
+            ))
+
+        // count=4096 → ALLOW (4096 <= 4096)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [0, 0, 4096]) == retAllow)
+        // count=4097 → ERRNO (4097 > 4096)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [0, 0, 4097]) == retErrno(1))
+        // count=0 → ALLOW (0 <= 4096)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [0, 0, 0]) == retAllow)
+    }
+
+    // MARK: - 64-bit argument handling
+
+    @Test("handles 64-bit argument values correctly")
+    func arg64Bit() throws {
+        let largeValue: UInt64 = 0x1_0000_0001  // Requires both hi and lo halves
+        let prog = try SeccompCompiler.compileFromOCI(
+            config: makeConfig(
+                defaultAction: .actErrno,
+                defaultErrnoRet: 1,
+                syscalls: [
+                    LinuxSyscall(
+                        names: ["read"],
+                        action: .actAllow,
+                        errnoRet: nil,
+                        args: [LinuxSeccompArg(index: 0, value: largeValue, valueTwo: 0, op: .opEqualTo)]
+                    )
+                ]
+            ))
+
+        // Exact match → ALLOW
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [largeValue]) == retAllow)
+        // Just the low half → ERRNO (high half doesn't match)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [1]) == retErrno(1))
+        // Just the high half → ERRNO (low half doesn't match)
+        #expect(BPFSimulator.run(prog, syscallNr: 63, arch: aarch64, args: [0x1_0000_0000]) == retErrno(1))
+    }
+
+    // MARK: - Error handling
+
+    @Test("rejects arg index > 5")
+    func invalidArgIndex() {
+        let config = makeConfig(
+            syscalls: [
+                LinuxSyscall(
+                    names: ["read"],
+                    action: .actErrno,
+                    errnoRet: 1,
+                    args: [LinuxSeccompArg(index: 6, value: 0, valueTwo: 0, op: .opEqualTo)]
+                )
+            ]
+        )
+
+        #expect(throws: SeccompCompiler.Error.self) {
+            try SeccompCompiler.compileFromOCI(config: config)
+        }
+    }
+
+    // MARK: - Flag mapping
+
+    @Test("maps seccomp flags correctly")
+    func flagMapping() {
+        #expect(SeccompCompiler.mapFlags([]) == 0)
+        #expect(SeccompCompiler.mapFlags([.flagLog]) == 2)
+        #expect(SeccompCompiler.mapFlags([.flagSpecAllow]) == 4)
+        #expect(SeccompCompiler.mapFlags([.flagLog, .flagSpecAllow]) == 6)
+        #expect(SeccompCompiler.mapFlags([.flagWaitKillableRecv]) == 32)
+    }
+}
diff --git a/vminitd/Package.swift b/vminitd/Package.swift
index 11039778..fcb050a4 100644
--- a/vminitd/Package.swift
+++ b/vminitd/Package.swift
@@ -68,6 +68,7 @@ let package = Package(
                 .product(name: "SystemPackage", package: "swift-system"),
                 .product(name: "Containerization", package: "containerization"),
                 .product(name: "ContainerizationOS", package: "containerization"),
+                .product(name: "ContainerizationSeccomp", package: "containerization"),
                 "LCShim",
                 "Cgroup",
             ]
diff --git a/vminitd/Sources/LCShim/include/syscall.h b/vminitd/Sources/LCShim/include/syscall.h
index f30e3ab2..dfb7095b 100644
--- a/vminitd/Sources/LCShim/include/syscall.h
+++ b/vminitd/Sources/LCShim/include/syscall.h
@@ -37,4 +37,18 @@ int CZ_pidfd_getfd(int pidfd, int targetfd, unsigned int flags);
 
 int CZ_prctl_set_no_new_privs();
 
+struct CZ_sock_filter {
+  unsigned short code;
+  unsigned char jt;
+  unsigned char jf;
+  unsigned int k;
+};
+
+struct CZ_sock_fprog {
+  unsigned short len;
+  struct CZ_sock_filter *filter;
+};
+
+int CZ_seccomp_set_mode_filter(unsigned int flags, struct CZ_sock_fprog *prog);
+
 #endif
diff --git a/vminitd/Sources/LCShim/syscall.c b/vminitd/Sources/LCShim/syscall.c
index 4070196c..6af98738 100644
--- a/vminitd/Sources/LCShim/syscall.c
+++ b/vminitd/Sources/LCShim/syscall.c
@@ -39,3 +39,15 @@ int CZ_pidfd_getfd(int pidfd, int targetfd, unsigned int flags) {
 int CZ_prctl_set_no_new_privs() {
   return prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 }
+
+#ifndef SYS_seccomp
+#define SYS_seccomp 277
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+int CZ_seccomp_set_mode_filter(unsigned int flags, struct CZ_sock_fprog *prog) {
+  return syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, flags, prog);
+}
diff --git a/vminitd/Sources/vmexec/ExecCommand.swift b/vminitd/Sources/vmexec/ExecCommand.swift
index a3f0dd23..e3b40972 100644
--- a/vminitd/Sources/vmexec/ExecCommand.swift
+++ b/vminitd/Sources/vmexec/ExecCommand.swift
@@ -35,6 +35,9 @@ struct ExecCommand: ParsableCommand {
     @Option(name: .long, help: "pid of the init process for the container")
     var parentPid: Int
 
+    @Option(name: .long, help: "path to the OCI bundle for the container")
+    var bundlePath: String?
+
     func run() throws {
         do {
             let src = URL(fileURLWithPath: processPath)
@@ -43,7 +46,15 @@ struct ExecCommand: ParsableCommand {
                 ContainerizationOCI.Process.self,
                 from: processBytes
             )
-            try execInNamespaces(process: process)
+
+            var seccomp: ContainerizationOCI.LinuxSeccomp?
+            if let bundlePath {
+                let bundle = try ContainerizationOCI.Bundle.load(path: URL(filePath: bundlePath))
+                let spec = try bundle.loadConfig()
+                seccomp = spec.linux?.seccomp
+            }
+
+            try execInNamespaces(process: process, seccomp: seccomp)
         } catch {
             App.writeError(error)
             throw error
@@ -56,7 +67,7 @@ struct ExecCommand: ParsableCommand {
         }
     }
 
-    private func execInNamespaces(process: ContainerizationOCI.Process) throws {
+    private func execInNamespaces(process: ContainerizationOCI.Process, seccomp: ContainerizationOCI.LinuxSeccomp?) throws {
         let syncPipe = FileDescriptor(rawValue: 3)
         let ackPipe = FileDescriptor(rawValue: 4)
 
@@ -142,13 +153,28 @@ struct ExecCommand: ParsableCommand {
             // Set uid, gid, and supplementary groups
             try App.setPermissions(user: process.user)
 
+            // Resolve the executable path before seccomp is applied.
+            let resolvedExecutable = try App.resolveExecutable(process: process, currentEnv: process.env)
+
+            // Without noNewPrivileges, seccomp is a privileged operation that
+            // requires CAP_SYS_ADMIN. Install it before dropping capabilities.
+            // With noNewPrivileges, install it as late as possible (right before
+            // exec) to minimize the syscalls that need to be in the profile.
+            if let seccomp, !process.noNewPrivileges {
+                try App.setSeccomp(seccomp: seccomp)
+            }
+
             // Finish capabilities (after user change)
             try App.finishCapabilities(preparedCaps)
 
             // Set no_new_privs if requested by the OCI spec.
             try App.setNoNewPrivileges(process: process)
 
-            try App.exec(process: process, currentEnv: process.env)
+            if let seccomp, process.noNewPrivileges {
+                try App.setSeccomp(seccomp: seccomp)
+            }
+
+            try App.exec(process: process, resolvedExecutable: resolvedExecutable)
         } else {  // parent process
             // Send our child's pid to our parent before we exit.
             var childPid = processID
diff --git a/vminitd/Sources/vmexec/RunCommand.swift b/vminitd/Sources/vmexec/RunCommand.swift
index ba070340..5951f657 100644
--- a/vminitd/Sources/vmexec/RunCommand.swift
+++ b/vminitd/Sources/vmexec/RunCommand.swift
@@ -193,14 +193,29 @@ struct RunCommand: ParsableCommand {
         // Set uid, gid, and supplementary groups.
         try App.setPermissions(user: process.user)
 
+        // Resolve the executable path before seccomp is applied.
+        let resolvedExecutable = try App.resolveExecutable(process: process, currentEnv: process.env)
+
+        // Without noNewPrivileges, seccomp is a privileged operation that
+        // requires CAP_SYS_ADMIN. Install it before dropping capabilities.
+        // With noNewPrivileges, install it as late as possible (right before
+        // exec) to minimize the syscalls that need to be in the profile.
+        if let seccomp = spec.linux?.seccomp, !process.noNewPrivileges {
+            try App.setSeccomp(seccomp: seccomp)
+        }
+
         // Finish capabilities (after user change)
         try App.finishCapabilities(preparedCaps)
 
         // Set no_new_privs if requested by the OCI spec.
         try App.setNoNewPrivileges(process: process)
 
+        if let seccomp = spec.linux?.seccomp, process.noNewPrivileges {
+            try App.setSeccomp(seccomp: seccomp)
+        }
+
         // Finally execve the container process.
-        try App.exec(process: process, currentEnv: process.env)
+        try App.exec(process: process, resolvedExecutable: resolvedExecutable)
     }
 
     private func setupNamespaces(namespaces: [ContainerizationOCI.LinuxNamespace]?) throws -> Int32 {
diff --git a/vminitd/Sources/vmexec/vmexec.swift b/vminitd/Sources/vmexec/vmexec.swift
index b1778c37..593f133d 100644
--- a/vminitd/Sources/vmexec/vmexec.swift
+++ b/vminitd/Sources/vmexec/vmexec.swift
@@ -23,6 +23,7 @@ import ArgumentParser
 import ContainerizationError
 import ContainerizationOCI
 import ContainerizationOS
+import ContainerizationSeccomp
 import FoundationEssentials
 import LCShim
 import Logging
@@ -64,32 +65,34 @@ extension App {
         }
     }
 
-    static func exec(process: ContainerizationOCI.Process, currentEnv: [String]? = nil) throws {
+    static func resolveExecutable(process: ContainerizationOCI.Process, currentEnv: [String]? = nil) throws -> URL {
         guard !process.args.isEmpty else {
             throw App.Errno(stage: "exec", info: "process args cannot be empty")
         }
 
         let executableArg = process.args[0]
-        let resolvedExecutable: URL
 
-        if executableArg.contains("/") {
-            if executableArg.hasPrefix("/") {
-                resolvedExecutable = URL(fileURLWithPath: executableArg)
-            } else {
-                resolvedExecutable = URL(fileURLWithPath: process.cwd).appendingPathComponent(executableArg).standardized
-            }
-
-            guard FileManager.default.fileExists(atPath: resolvedExecutable.path) else {
-                throw App.Failure(message: "failed to find target executable \(executableArg)")
-            }
-        } else {
+        guard executableArg.contains("/") else {
             let path = Path.findPath(currentEnv) ?? Path.getCurrentPath()
             guard let found = Path.lookPath(executableArg, path: path) else {
                 throw App.Failure(message: "failed to find target executable \(executableArg)")
             }
-            resolvedExecutable = found
+            return found
         }
+        let resolved: URL
+        if executableArg.hasPrefix("/") {
+            resolved = URL(fileURLWithPath: executableArg)
+        } else {
+            resolved = URL(fileURLWithPath: process.cwd).appendingPathComponent(executableArg).standardized
+        }
+
+        guard FileManager.default.fileExists(atPath: resolved.path) else {
+            throw App.Failure(message: "failed to find target executable \(executableArg)")
+        }
+        return resolved
+    }
 
+    static func exec(process: ContainerizationOCI.Process, resolvedExecutable: URL) throws {
         let executable = strdup(resolvedExecutable.path)
         var argv = process.args.map { strdup($0) }
         argv += [nil]
@@ -250,6 +253,28 @@ extension App {
         }
     }
 
+    static func setSeccomp(seccomp: ContainerizationOCI.LinuxSeccomp) throws {
+
+        let bpfInstructions = try SeccompCompiler.compileFromOCI(config: seccomp)
+        guard !bpfInstructions.isEmpty else { return }
+
+        let flags = SeccompCompiler.mapFlags(seccomp.flags)
+
+        let filters = bpfInstructions.map { inst in
+            CZ_sock_filter(code: inst.code, jt: inst.jt, jf: inst.jf, k: inst.k)
+        }
+
+        try filters.withUnsafeBufferPointer { buffer in
+            var prog = CZ_sock_fprog(
+                len: UInt16(buffer.count),
+                filter: UnsafeMutablePointer(mutating: buffer.baseAddress!)
+            )
+            guard CZ_seccomp_set_mode_filter(flags, &prog) == 0 else {
+                throw App.Errno(stage: "seccomp(SET_MODE_FILTER)", info: "failed to apply seccomp filter")
+            }
+        }
+    }
+
     static func Errno(stage: String, info: String = "") -> ContainerizationError {
         let posix = POSIXError(.init(rawValue: errno)!, userInfo: ["stage": stage])
         return ContainerizationError(.internalError, message: "\(info) \(String(describing: posix))")
diff --git a/vminitd/Sources/vminitd/ManagedProcess.swift b/vminitd/Sources/vminitd/ManagedProcess.swift
index 4ace4045..7a409f80 100644
--- a/vminitd/Sources/vminitd/ManagedProcess.swift
+++ b/vminitd/Sources/vminitd/ManagedProcess.swift
@@ -103,6 +103,8 @@ final class ManagedProcess: ContainerProcess, Sendable {
                 "\(owningPid)",
                 "--process-path",
                 bundle.getExecSpecPath(id: id).path,
+                "--bundle-path",
+                bundle.path.path,
             ]
         } else {
             args = ["run", "--bundle-path", bundle.path.path]