diff --git a/rootfs.py b/rootfs.py index f4a6d226..cda68cf7 100755 --- a/rootfs.py +++ b/rootfs.py @@ -31,8 +31,9 @@ def create_configuration_file(args): config_path = os.path.join('sysa', 'bootstrap.cfg') with open(config_path, "w", encoding="utf_8") as config: config.write(f"FORCE_TIMESTAMPS={args.force_timestamps}\n") - config.write(f"CHROOT={args.chroot or args.bwrap}\n") + config.write(f"CHROOT={args.chroot or args.wrap or args.bwrap}\n") config.write(f"CHROOT_ONLY_SYSA={args.bwrap}\n") + config.write(f"CHROOT_WRAP={args.wrap}\n") config.write(f"UPDATE_CHECKSUMS={args.update_checksums}\n") config.write(f"JOBS={args.cores}\n") config.write(f"INTERNAL_CI={args.internal_ci}\n") @@ -59,6 +60,8 @@ def main(): action="store_true") parser.add_argument("-bw", "--bwrap", help="Run inside a bwrap sandbox", action="store_true") + parser.add_argument("-w", "--wrap", help="Use builtin unprivileged wrapper", + action="store_true") parser.add_argument("-p", "--preserve", help="Do not remove temporary dir", action="store_true") parser.add_argument("-t", "--tmpdir", help="Temporary directory", @@ -113,6 +116,8 @@ def check_types(): count += 1 if args.bwrap: count += 1 + if args.wrap: + count += 1 if args.bare_metal: count += 1 return count @@ -131,6 +136,9 @@ def check_types(): if args.bwrap and args.tmpfs: raise ValueError("tmpfs cannot be used with bwrap.") + if args.wrap and args.tmpfs: + raise ValueError("tmpfs cannot be used with wrap.") + # Cores validation if int(args.cores) < 1: raise ValueError("Must use one or more cores.") @@ -224,6 +232,15 @@ def bootstrap(args, system_a, system_c, tmpdir): '--tmpfs', '/tmp', '/init') + elif args.wrap: + system_c.prepare(create_disk_image=False) + system_a.prepare(create_initramfs=False, wrap=True) + + arch = stage0_arch_map.get(args.arch, args.arch) + init = os.path.join('bootstrap-seeds', 'POSIX', arch, 'kaem-optional-seed') + + run(init, cwd = system_a.tmp_dir) + elif args.bare_metal: if args.kernel: system_c.prepare(create_disk_image=True) diff --git a/sysa.py b/sysa.py index d474109a..5cb9214d 100755 --- a/sysa.py +++ b/sysa.py @@ -37,7 +37,7 @@ def __init__(self, tmpdir, arch, external_sources, self.tmp_dir = tmpdir.add_sys("sysa") - def prepare(self, create_initramfs, kernel_bootstrap=False): + def prepare(self, create_initramfs, kernel_bootstrap=False, wrap=False): """ Prepare directory structure for System A. We create an empty tmp directory, unpack stage0-posix. @@ -50,7 +50,7 @@ def prepare(self, create_initramfs, kernel_bootstrap=False): shutil.copy2(os.path.join(self.sys_dir, 'base-preseeded.kaem'), os.path.join(self.tmp_dir, 'kaem.x86')) else: - self.stage0_posix() + self.stage0_posix(wrap) self.sysa() @@ -93,7 +93,7 @@ def sysc(self, create_initramfs): shutil.copytree(self.sysc_dir, os.path.join(self.tmp_dir, 'sysc'), ignore=ignore) - def stage0_posix(self): + def stage0_posix(self, wrap): """Copy in all of the stage0-posix""" stage0_posix_base_dir = os.path.join(self.sys_dir, 'stage0-posix', 'src') copy_tree(stage0_posix_base_dir, self.tmp_dir) @@ -104,7 +104,11 @@ def stage0_posix(self): shutil.copy2(kaem_optional_seed, os.path.join(self.tmp_dir, 'init')) # stage0-posix hook to continue running live-bootstrap - shutil.copy2(os.path.join(self.sys_dir, 'after.kaem'), + if wrap: + after_kaem_name = "after_wrap.kaem" + else: + after_kaem_name = "after.kaem" + shutil.copy2(os.path.join(self.sys_dir, after_kaem_name), os.path.join(self.tmp_dir, 'after.kaem')) def add_fiwix_files(self, file_list_path, dirpath): diff --git a/sysa/after_wrap.kaem b/sysa/after_wrap.kaem new file mode 100644 index 00000000..361447d0 --- /dev/null +++ b/sysa/after_wrap.kaem @@ -0,0 +1,15 @@ +#!/bin/sh + +# SPDX-FileCopyrightText: 2023 Max Hearnden +# SPDX-License-Identifier: GPL-3.0-or-later + +set -ex + +PATH=./${ARCH_DIR}/bin + +./${ARCH_DIR}/bin/M2-Mesoplanet --architecture ${ARCH} \ + -f sysa/wrap.c \ + -o ./${ARCH_DIR}/bin/wrap \ + --temp-directory ./x86/artifact + +exec ./${ARCH_DIR}/bin/wrap ./${ARCH_DIR}/bin/kaem --verbose --strict --file sysa/after.kaem diff --git a/sysa/run2.sh b/sysa/run2.sh index 6d21eca7..dbe4c9d5 100755 --- a/sysa/run2.sh +++ b/sysa/run2.sh @@ -114,6 +114,15 @@ else SYSC=/sysc_image sys_transfer "${SYSC}" /sysc gzip patch if [ "${CHROOT_ONLY_SYSA}" != True ]; then + if [ "${CHROOT_WRAP}" = True ]; then + # bind mount dev, proc and sys into new root + mkdir -p "${SYSC}/dev" + mount --no-mtab --rbind /dev "${SYSC}/dev" + mkdir -p "${SYSC}/proc" + mount --no-mtab --rbind /proc "${SYSC}/proc" + mkdir -p "${SYSC}/sys" + mount --no-mtab --rbind /sys "${SYSC}/sys" + fi exec chroot "${SYSC}" /init fi fi diff --git a/sysa/wrap.c b/sysa/wrap.c new file mode 100644 index 00000000..c16dde17 --- /dev/null +++ b/sysa/wrap.c @@ -0,0 +1,405 @@ +/* SPDX-FileCopyrightText: 2023 Max Hearnden */ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + + +#define CLONE_NEWUSER 0x10000000 +#define CLONE_NEWNS 0x00020000 +#define MS_BIND 4096 +#define MS_REC 16384 +#define MNT_DETACH 0x00000002 +#define _GNU_SOURCE + +#include +#include +#include + +#include + +#include +#include + +#ifdef __M2__ + +#include + +#if __i386__ + +int unshare(int flags) { + asm ( + "lea_ebx,[esp+DWORD] %4" + "mov_ebx,[ebx]" + "mov_eax, %310" + "int !0x80" + ); +} + +int geteuid() { + asm ( + "mov_eax, %201" + "int !0x80" + ); +} + +int getegid() { + asm ( + "mov_eax, %202" + "int !0x80" + ); +} + +int mount( + char *source, char *target, char *filesystemtype, + unsigned mountflags, void *data +) { + asm ( + "DEFINE mov_esi,[esp+DWORD] 8BB424" + "DEFINE mov_edi,[esp+DWORD] 8BBC24" + "lea_ebx,[esp+DWORD] %20" + "mov_ebx,[ebx]" + "lea_ecx,[esp+DWORD] %16" + "mov_ecx,[ecx]" + "lea_edx,[esp+DWORD] %12" + "mov_edx,[edx]" + "mov_esi,[esp+DWORD] %8 ; mov esi, [esp+8]" + "mov_edi,[esp+DWORD] %4 ; mov edi, [esp+4]" + "mov_eax, %21" + "int !0x80" + ); +} + +int chroot(char *path) { + asm ( + "lea_ebx,[esp+DWORD] %4" + "mov_ebx,[ebx]" + "mov_eax, %61" + "int !0x80" + ); +} + +#elif __x86_64__ + +int unshare(int flags) { + asm ( + "lea_rdi,[rsp+DWORD] %8" + "mov_rdi,[rdi]" + "mov_rax, %272" + "syscall" + ); +} + +int geteuid() { + asm ( + "mov_rax, %107" + "syscall" + ); +} + +int getegid() { + asm ( + "mov_rax, %108" + "syscall" + ); +} + +int mount( + char *source, char *target, char *filesystemtype, + unsigned mountflags, void *data +) { + asm ( + "DEFINE mov_r8,[rsp+DWORD] 4C8B8424" + "DEFINE mov_r10,[rsp+DWORD] 4C8B9424" + "lea_rdi,[rsp+DWORD] %40" + "mov_rdi,[rdi]" + "lea_rsi,[rsp+DWORD] %32" + "mov_rsi,[rsi]" + "lea_rdx,[rsp+DWORD] %24" + "mov_rdx,[rdx]" + "mov_r10,[rsp+DWORD] %16" + "mov_r8,[rsp+DWORD] %8" + "mov_rax, %165" + "syscall" + ); +} + +int chroot(char *path) { + asm ( + "lea_rdi,[rsp+DWORD] %8" + "mov_rdi,[rdi]" + "mov_rax, %161" + "syscall" + ); +} + +#elif __riscv && __riscv_xlen==32 + +int unshare(int flags) { + asm ( + "rd_a0 rs1_fp !-4 lw" + "rd_a7 !97 addi" + "ecall" + ); +} + +int geteuid() { + asm ( + "rd_a7 !175 addi" + "ecall" + ); +} + +int getegid() { + asm ( + "rd_a7 !177 addi" + "ecall" + ); +} + +int mount ( + char *source, char *target, char *filesystemtype, + unsigned mountflags, void *data +) { + asm ( + "rd_a0 rs1_fp !-4 lw" + "rd_a1 rs1_fp !-8 lw" + "rd_a2 rs1_fp !-12 lw" + "rd_a3 rs1_fp !-16 lw" + "rd_a4 rs1_fp !-20 lw" + "rd_a7 !40 addi" + "ecall" + ); +} + +int chroot(char *path) { + asm ( + "rd_a0 rs1_fp !-4 lw" + "rd_a7 !51 addi" + "ecall" + ); +} + +#elif __riscv && __riscv_xlen==64 + +int unshare(int flags) { + asm ( + "rd_a0 rs1_fp !-8 ld" + "rd_a7 !97 addi" + "ecall" + ); +} + +int geteuid() { + asm ( + "rd_a7 !175 addi" + "ecall" + ); +} + +int getegid() { + asm ( + "rd_a7 !177 addi" + "ecall" + ); +} + +int mount ( + char *source, char *target, char *filesystemtype, + unsigned mountflags, void *data +) { + asm ( + "rd_a0 rs1_fp !-8 ld" + "rd_a1 rs1_fp !-16 ld" + "rd_a2 rs1_fp !-24 ld" + "rd_a3 rs1_fp !-32 ld" + "rd_a4 rs1_fp !-40 ld" + "rd_a7 !40 addi" + "ecall" + ); +} + +int chroot(char *path) { + asm ( + "rd_a0 rs1_fp !-8 ld" + "rd_a7 !51 addi" + "ecall" + ); +} + +#else + +#error arch not supported + +#endif + +#else + +extern int unshare(int flags); + +extern int mount(const char *source, const char *target, + const char *filesystemtype, unsigned long mountflags, const void *data); + +#endif + +void touch(char *path) { + int fd = open(path, O_CREAT, 0777); + if (fd == -1) { + fputs("Failed to create file ", stderr); + fputs(path, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); + } + if (close(fd) != 0) { + fputs("Failed to close file ", stderr); + fputs(path, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); + } +} + +void mkmount( + char *source, char *target, char *filesystemtype, + unsigned mountflags, void *data, int type +) { + int r = 0; + if (type) { + r = mkdir(target, 0755); + } else { + touch(target); + } + if (r != 0 && r != -17) { + fputs("Failed to create mountpoint ", stderr); + fputs(target, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); + } + + r = mount(source, target, filesystemtype, mountflags, data); + + if (r != 0) { + fputs("Failed to mount directory ", stderr); + fputs(target, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); + } +} + +void set_map(int parent_id, char *path) { + int fd = open(path, O_WRONLY, 0); + if (fd == -1) { + fputs("Failed to open map file ", stderr); + fputs(path, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); + } + + char *map_contents = calloc(38, sizeof(char)); + +#ifdef __M2__ + strcpy(map_contents, "0 "); + char *parent_id_str = int2str(parent_id, 10, 0); + strcat(map_contents, parent_id_str); + strcat(map_contents, " 1"); +#else + snprintf(map_contents, 38, "0 %i 1", parent_id); +#endif + write(fd, map_contents, strlen(map_contents)); + write(STDOUT_FILENO, map_contents, strlen(map_contents)); + free(map_contents); + close(fd); +} + +void deny_setgroups() { + int fd = open("/proc/self/setgroups", O_WRONLY, 0777); + if(fd == -1) { + fputs("Failed to open /proc/self/setgroups\n", stderr); + exit(EXIT_FAILURE); + } + write(fd, "deny", 4); + close(fd); +} + +int main(int argc, char **argv) { + if(argc <= 1) { + fputs("Expected at least one argument: command\n", stderr); + exit(EXIT_FAILURE); + } + char *cwd = get_current_dir_name(); + /* Do nothing if cwd is already root */ + if (strcmp(cwd, "/")) { + int uid = geteuid(); + int gid = getegid(); + /* Don't create a user and mount namespace if we are already root */ + if (uid != 0) { + if (unshare(CLONE_NEWUSER | CLONE_NEWNS) != 0) { + fputs("Failed to create user and mount namespaces\n", stderr); + exit(EXIT_FAILURE); + } + /* Prevent the use of setgroups and make gid_map writeable */ + deny_setgroups(); + /* Map the root user in the user namespace to our user id */ + set_map(uid, "/proc/self/uid_map"); + /* Map the root group in the user namespace to our group id */ + set_map(gid, "/proc/self/gid_map"); + } + int r = mkdir("dev", 0755); + if (r != 0 && r != -17) { + fputs("Failed to create dev folder\n", stderr); + exit(EXIT_FAILURE); + } + mkmount ("/dev/null", "dev/null", "", MS_BIND, NULL, 0); + mkmount ("/dev/zero", "dev/zero", "", MS_BIND, NULL, 0); + mkmount ("/dev/random", "dev/random", "", MS_BIND, NULL, 0); + mkmount ("/dev/urandom", "dev/urandom", "", MS_BIND, NULL, 0); + mkmount ("/dev/ptmx", "dev/ptmx", "", MS_BIND, NULL, 0); + mkmount ("/dev/tty", "dev/tty", "", MS_BIND, NULL, 0); + mkmount ("tmpfs", "dev/shm", "tmpfs", 0, NULL, 1); + mkmount ("/proc", "proc", "", MS_BIND | MS_REC, NULL, 1); + mkmount ("/sys", "sys", "", MS_BIND | MS_REC, NULL, 1); + mkmount ("tmpfs", "tmp", "tmpfs", 0, NULL, 1); + if (chroot (".") != 0) { + fputs("Failed to chroot into .\n", stderr); + exit(EXIT_FAILURE); + } + } + free(cwd); + + + char **newenv = malloc(3 * sizeof(char *)); + int newenv_index = 0; + if (newenv == NULL) { + fputs("Failed to allocate space for new environment\n", stderr); + exit(EXIT_FAILURE); + } + + char *ARCH = getenv("ARCH"); + if (ARCH != NULL) { + newenv[0] = malloc(6 + strlen(ARCH)); + if (newenv[0] == NULL) { + fputs("Failed to allocate space for new environment\n", stderr); + exit(EXIT_FAILURE); + } + strcpy(newenv[0], "ARCH="); + strcpy(newenv[0] + 5, ARCH); + newenv_index += 1; + } + + char *ARCH_DIR = getenv("ARCH_DIR"); + if (ARCH_DIR != NULL) { + newenv[newenv_index] = malloc(10 + strlen(ARCH_DIR)); + if (newenv[newenv_index] == NULL) { + fputs("Failed to allocate space for new environment\n", stderr); + exit(EXIT_FAILURE); + } + strcpy(newenv[newenv_index], "ARCH_DIR="); + strcpy(newenv[newenv_index] + 9, ARCH_DIR); + newenv_index += 1; + } + + newenv[newenv_index] = NULL; + + +#ifdef __M2__ + return execve (argv[1], argv + sizeof(char *), newenv); +#else + return execve (argv[1], argv + 1, newenv); +#endif +}