From cb8c283ea302545300fe1429636cebd2382d5d9a Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 8 May 2026 09:22:55 -0400 Subject: [PATCH] bootloader: Fix a few things related to systemd-boot installs See: https://github.com/osbuild/image-builder-cli/issues/506 Right now most of our testing for composefs installs uses bcvk which uses `to-disk` in an ephemeral VM. That masks some issues, like the fact that `bootctl install` by default touches the ESP variables. Wire things up with `--generic-image` so it behaves the same as the bootupd backend. Now, the next problem is that osbuild actually makes `/` readonly (which is great!) and in fact we should change our recommended `podman run` invocations to pass `--read-only`. But that reveals the next bug, which is that bootctl installs wants to write an `/etc/kernel/entry-token` which we basically don't use. Redirect those writes to a tempdir for now, though we should probably try to get upstream patches for that. Also set SYSTEMD_RELAX_ESP_CHECKS=1 so bootctl skips the partition-type GUID check for the ESP - we don't want it to e.g. try to look up things in the udev database, which might not be available. Assisted-by: OpenCode (claude-sonnet-4-6) Signed-off-by: Colin Walters --- crates/lib/src/bootc_composefs/boot.rs | 140 ++++++++++++++++++++++--- crates/lib/src/bootloader.rs | 105 +++++++++++-------- crates/mount/src/tempmount.rs | 42 ++++++++ 3 files changed, 232 insertions(+), 55 deletions(-) diff --git a/crates/lib/src/bootc_composefs/boot.rs b/crates/lib/src/bootc_composefs/boot.rs index f64b2af5e..81a6dfa21 100644 --- a/crates/lib/src/bootc_composefs/boot.rs +++ b/crates/lib/src/bootc_composefs/boot.rs @@ -195,10 +195,31 @@ fi ) } -/// Mount the ESP from the provided device +/// Mount flags shared by all ESP mounts: non-executable, no setuid. +const ESP_MOUNT_FLAGS: MountFlags = + MountFlags::from_bits_retain(MountFlags::NOEXEC.bits() | MountFlags::NOSUID.bits()); + +/// FAT mount options: owner-only permissions on files (0600) and dirs (0700). +const ESP_MOUNT_DATA: &std::ffi::CStr = c"fmask=0177,dmask=0077"; + +/// Mount the ESP from the provided device into a temporary directory. pub fn mount_esp(device: &str) -> Result { - let flags = MountFlags::NOEXEC | MountFlags::NOSUID; - TempMount::mount_dev(device, "vfat", flags, Some(c"fmask=0177,dmask=0077")) + TempMount::mount_dev(device, "vfat", ESP_MOUNT_FLAGS, Some(ESP_MOUNT_DATA)) +} + +/// Mount the ESP from `device` at the given path and return a guard that +/// synchronously unmounts (and flushes) it on drop. +pub(crate) fn mount_esp_at( + device: &str, + path: std::path::PathBuf, +) -> Result { + bootc_mount::tempmount::MountGuard::mount( + device, + path, + "vfat", + ESP_MOUNT_FLAGS, + Some(ESP_MOUNT_DATA), + ) } /// Filename release field for primary (new/upgraded) entry. @@ -1145,6 +1166,100 @@ pub(crate) fn setup_composefs_uki_boot( Ok(boot_digest) } +/// A composefs image attached to a temporary directory with the ESP and a +/// tmpfs mounted inside it, ready for bootloader installation. +/// +/// The composefs image (a detached `fsmount(2)` fd with no VFS path) is +/// attached to a tmpdir via `move_mount(2)`, giving us a real filesystem path +/// that `mount(2)` and bootctl can use. The ESP is mounted at +/// `/efi` (if that directory exists in the image) or `/boot`, +/// per the Boot Loader Specification. A tmpfs is also mounted at +/// `/tmp` to provide a writable scratch area for tools invoked with +/// `--root`. +/// +/// Drop order matters: the ESP and tmpfs guards are declared before `composefs` +/// so they are unmounted (and flushed) before the composefs root is detached. +pub(crate) struct MountedImageRoot { + // Unmounted before `composefs` on drop; ESP before tmp (inner before outer). + _esp: bootc_mount::tempmount::MountGuard, + _tmp: bootc_mount::tempmount::MountGuard, + composefs: TempMount, + pub(crate) esp_subdir: &'static str, +} + +impl MountedImageRoot { + /// Find the ESP on `device`, attach the composefs image to a tmpdir, and + /// mount the ESP and a scratch tmpfs inside it. + // TODO: install to all ESPs on multi-device setups + #[context("Preparing image root for bootloader installation")] + pub(crate) fn new( + composefs_mnt_fd: std::os::fd::OwnedFd, + device: &bootc_blockdev::Device, + ) -> Result { + let roots = device.find_all_roots()?; + let mut esp_part = None; + for root in &roots { + if let Some(esp) = root.find_partition_of_esp_optional()? { + esp_part = Some(esp); + break; + } + } + let esp_part = esp_part.ok_or_else(|| anyhow!("ESP partition not found"))?; + + // Attach the detached composefs fsmount fd to a real tmpdir path so + // that mount(2) and bootctl --root can work with it. + let composefs = TempMount::mount_fd(composefs_mnt_fd) + .context("Attaching composefs image to temporary directory")?; + + // TODO: support XBOOTLDR. Per BLS, the ESP should be mounted at /efi + // when a separate XBOOTLDR partition is present at /boot. bootc does + // not yet detect or use XBOOTLDR in the composefs install path, so + // unconditionally mount the ESP at /boot for now. + let esp_subdir = "boot"; + + let esp_path = composefs.dir.path().join(esp_subdir); + let esp = + mount_esp_at(&esp_part.path(), esp_path).context("Mounting ESP into composefs root")?; + + // Mount a tmpfs over /tmp so that tools invoked with --root have a + // writable scratch area without touching the read-only EROFS root. + let tmp_path = composefs.dir.path().join("tmp"); + let tmp = bootc_mount::tempmount::MountGuard::mount( + "tmpfs", + tmp_path, + "tmpfs", + MountFlags::NOEXEC | MountFlags::NOSUID | MountFlags::NODEV, + None::<&std::ffi::CStr>, + ) + .context("Mounting tmpfs into composefs root")?; + + Ok(Self { + _esp: esp, + _tmp: tmp, + composefs, + esp_subdir, + }) + } + + /// The composefs image as a capability-safe directory (for file reads). + pub(crate) fn dir(&self) -> &Dir { + &self.composefs.fd + } + + /// Real filesystem path of the composefs tmpdir root. + pub(crate) fn root_path(&self) -> &std::path::Path { + self.composefs.dir.path() + } + + /// Open the mounted ESP as a capability-safe directory. + pub(crate) fn open_esp_dir(&self) -> Result { + self.composefs + .fd + .open_dir(self.esp_subdir) + .with_context(|| format!("Opening ESP at /{}", self.esp_subdir)) + } +} + pub struct SecurebootKeys { pub dir: Dir, pub keys: Vec, @@ -1225,13 +1340,12 @@ pub(crate) async fn setup_composefs_boot( let entries = get_boot_resources(&fs, &*repo).context("Extracting boot entries from OCI image")?; - let mounted_fs = Dir::reopen_dir( - &repo - .mount(&id.to_hex()) - .context("Failed to mount composefs image")?, - )?; + let composefs_mnt_fd = repo + .mount(&id.to_hex()) + .context("Failed to mount composefs image")?; + let mounted_root = MountedImageRoot::new(composefs_mnt_fd, &root_setup.device_info)?; - let postfetch = PostFetchState::new(state, &mounted_fs)?; + let postfetch = PostFetchState::new(state, mounted_root.dir())?; let boot_uuid = root_setup .get_boot_uuid()? @@ -1253,11 +1367,9 @@ pub(crate) async fn setup_composefs_boot( )?; } else { crate::bootloader::install_systemd_boot( - &root_setup.device_info, - &root_setup.physical_root_path, + &mounted_root, &state.config_opts, - None, - get_secureboot_keys(&mounted_fs, BOOTC_AUTOENROLL_PATH)?, + get_secureboot_keys(mounted_root.dir(), BOOTC_AUTOENROLL_PATH)?, )?; } @@ -1280,7 +1392,7 @@ pub(crate) async fn setup_composefs_boot( repo, &id, entry, - &mounted_fs, + mounted_root.dir(), )?, BootType::Uki => setup_composefs_uki_boot( BootSetupType::Setup((&root_setup, &state, &postfetch)), diff --git a/crates/lib/src/bootloader.rs b/crates/lib/src/bootloader.rs index 97b456cd7..0a40fc5c2 100644 --- a/crates/lib/src/bootloader.rs +++ b/crates/lib/src/bootloader.rs @@ -10,7 +10,7 @@ use fn_error_context::context; use bootc_mount as mount; -use crate::bootc_composefs::boot::{SecurebootKeys, mount_esp}; +use crate::bootc_composefs::boot::{MountedImageRoot, SecurebootKeys}; use crate::utils; /// The name of the mountpoint for efi (as a subdirectory of /boot, or at the toplevel) @@ -23,6 +23,16 @@ const BOOTUPD_UPDATES: &str = "usr/lib/bootupd/updates"; // from: https://github.com/systemd/systemd/blob/26b2085d54ebbfca8637362eafcb4a8e3faf832f/man/systemd-boot.xml#L392 const SYSTEMD_KEY_DIR: &str = "loader/keys"; +/// Redirect bootctl's entry-token write into a tmpfs scratch area. +/// +/// bootctl unconditionally writes `/entry-token` +/// during installation. Because systemd's `path_join()` is naive string +/// concatenation (see `src/bootctl/bootctl-install.c`), setting this to +/// `/tmp` causes the write to land at `/tmp/entry-token` +/// on the MountedImageRoot tmpfs, where it is automatically discarded. +/// bootc does not use the entry-token at all. +const KERNEL_INSTALL_CONF_ROOT: &str = "/tmp"; + /// Mount the first ESP found among backing devices at /boot/efi. /// /// This is used by the install-alongside path to clean stale bootloader @@ -218,66 +228,79 @@ pub(crate) fn install_via_bootupd( } } -/// Install systemd-boot to the first ESP found among backing devices. -/// -/// On multi-device setups only the first ESP is installed to; additional -/// ESPs on other backing devices are left untouched. -// TODO: install to all ESPs on multi-device setups +/// Install systemd-boot using a pre-prepared boot root. #[context("Installing bootloader")] pub(crate) fn install_systemd_boot( - device: &bootc_blockdev::Device, - _rootfs: &Utf8Path, + prepared_root: &MountedImageRoot, configopts: &crate::install::InstallConfigOpts, - _deployment_path: Option<&str>, autoenroll: Option, ) -> Result<()> { - let roots = device.find_all_roots()?; - let mut esp_part = None; - for root in &roots { - if let Some(esp) = root.find_partition_of_esp_optional()? { - esp_part = Some(esp); - break; - } - } - let esp_part = esp_part.ok_or_else(|| anyhow::anyhow!("ESP partition not found"))?; - - let esp_mount = mount_esp(&esp_part.path()).context("Mounting ESP")?; - let esp_path = Utf8Path::from_path(esp_mount.dir.path()) - .ok_or_else(|| anyhow::anyhow!("Failed to convert ESP mount path to UTF-8"))?; - println!("Installing bootloader via systemd-boot"); - let mut bootctl_args = vec!["install", "--esp-path", esp_path.as_str()]; + // We use the --root of the mounted target root, so we have the right /etc/os-release. + let root_path = prepared_root + .root_path() + .to_str() + .ok_or_else(|| anyhow::anyhow!("composefs tmpdir path is not UTF-8"))?; + let esp_path_in_root = format!("/{}", prepared_root.esp_subdir); + + let mut bootctl_args = vec![ + "install", + "--root", + root_path, + "--esp-path", + esp_path_in_root.as_str(), + // If we supported XBOOTLDR in the future, that'd go here with --boot-path. + ]; if configopts.generic_image { - bootctl_args.extend(["--random-seed", "no"]); + bootctl_args.extend(["--random-seed", "no", "--no-variables"]); } Command::new("bootctl") .args(bootctl_args) + // Skip partition-type GUID validation because e.g. osbuild + // may not provide the udev database. + .env("SYSTEMD_RELAX_ESP_CHECKS", "1") + // bootc doesn't use the entry-token file, but bootctl still tries to + // write it. Redirect into /tmp (a tmpfs mounted by MountedImageRoot) + // so the write succeeds and is automatically discarded. + .env("KERNEL_INSTALL_CONF_ROOT", KERNEL_INSTALL_CONF_ROOT) .log_debug() - .run_inherited_with_cmd_context()?; + // Capture stderr so bootctl error messages appear in our error chain. + .run_capture_stderr()?; if let Some(SecurebootKeys { dir, keys }) = autoenroll { - let path = esp_path.join(SYSTEMD_KEY_DIR); - create_dir_all(&path)?; - - let keys_dir = esp_mount - .fd + let esp_dir = prepared_root.open_esp_dir()?; + let keys_path = prepared_root + .root_path() + .join(prepared_root.esp_subdir) + .join(SYSTEMD_KEY_DIR); + create_dir_all(&keys_path).with_context(|| { + format!("Creating secureboot key directory {}", keys_path.display()) + })?; + + let keys_dir = esp_dir .open_dir(SYSTEMD_KEY_DIR) - .with_context(|| format!("Opening {path}"))?; + .with_context(|| format!("Opening {SYSTEMD_KEY_DIR}"))?; for filename in keys.iter() { - let p = path.join(&filename); - - // create directory if it doesn't already exist - if let Some(parent) = p.parent() { - create_dir_all(parent)?; + // Each key lives in a subdirectory, e.g. "PK/PK.auth". + // Create the per-key subdirectory before copying the file into it. + if let Some(parent) = filename.parent() { + if !parent.as_str().is_empty() { + keys_dir + .create_dir_all(parent) + .with_context(|| format!("Creating key subdirectory {parent}"))?; + } } - - dir.copy(&filename, &keys_dir, &filename) - .with_context(|| format!("Copying secure boot key: {p}"))?; - println!("Wrote Secure Boot key: {p}"); + dir.copy(filename, &keys_dir, filename) + .with_context(|| format!("Copying secure boot key {filename:?}"))?; + println!( + "Wrote Secure Boot key: {}/{}", + keys_path.display(), + filename.as_str() + ); } if keys.is_empty() { tracing::debug!("No Secure Boot keys provided for systemd-boot enrollment"); diff --git a/crates/mount/src/tempmount.rs b/crates/mount/src/tempmount.rs index a72571ad4..afd5dd527 100644 --- a/crates/mount/src/tempmount.rs +++ b/crates/mount/src/tempmount.rs @@ -1,4 +1,5 @@ use std::os::fd::AsFd; +use std::path::Path; use anyhow::{Context, Result}; @@ -7,6 +8,47 @@ use cap_std_ext::cap_std::{ambient_authority, fs::Dir}; use fn_error_context::context; use rustix::mount::{MountFlags, MoveMountFlags, UnmountFlags, move_mount, unmount}; +/// RAII guard that synchronously unmounts a path on drop, flushing all writes. +/// +/// Prefer this over `MNT_DETACH` when the mounted filesystem has received +/// writes (e.g. FAT ESP) and you need them flushed before the guard drops. +#[derive(Debug)] +pub struct MountGuard(std::path::PathBuf); + +impl MountGuard { + /// Mount `dev` at `path` and return a guard that will synchronously + /// unmount it on drop. + pub fn mount( + dev: &str, + path: std::path::PathBuf, + fstype: &str, + flags: MountFlags, + data: Option<&std::ffi::CStr>, + ) -> Result { + rustix::mount::mount(dev, &path, fstype, flags, data) + .with_context(|| format!("Mounting {} at {}", dev, path.display()))?; + Ok(Self(path)) + } +} + +impl std::ops::Deref for MountGuard { + type Target = Path; + fn deref(&self) -> &Path { + &self.0 + } +} + +impl Drop for MountGuard { + fn drop(&mut self) { + if let Err(e) = unmount(&self.0, UnmountFlags::empty()) { + // Synchronous unmount failure may mean buffered writes were not + // flushed to the underlying device (e.g. FAT ESP). Treat this as + // an error rather than a warning. + tracing::error!("Failed to unmount {}: {e:?}", self.0.display()); + } + } +} + /// RAII wrapper for a temporary mount that is automatically unmounted on drop. #[derive(Debug)] pub struct TempMount {