Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
269 changes: 206 additions & 63 deletions driver/luks_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@ limitations under the License.
package driver

import (
"bufio"
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"

"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -117,7 +120,7 @@ func getLuksContext(secrets map[string]string, context map[string]string, lifecy
}
}

func luksFormat(source string, mkfsCmd string, mkfsArgs []string, ctx LuksContext, log *logrus.Entry) error {
func luksFormat(source string, mkfsCmd string, mkfsArgs []string, ctx LuksContext, log *logrus.Entry) (err error) {
cryptsetupCmd, err := getCryptsetupCmd()
if err != nil {
return err
Expand All @@ -128,8 +131,7 @@ func luksFormat(source string, mkfsCmd string, mkfsArgs []string, ctx LuksContex
}

defer func() {
e := os.Remove(filename)
if e != nil {
if e := os.Remove(filename); e != nil {
log.Errorf("cannot delete temporary file %s: %s", filename, e.Error())
}
}()
Expand All @@ -156,21 +158,22 @@ func luksFormat(source string, mkfsCmd string, mkfsArgs []string, ctx LuksContex
err, cryptsetupCmd, strings.Join(cryptsetupArgs, " "), string(out))
}

// format the disk with the desired filesystem

// open the luks partition and set up a mapping
err = luksOpen(source, filename, ctx, log)
opened, err := luksOpen(source, filename, ctx, log)
if err != nil {
return fmt.Errorf("cryptsetup luksOpen failed: %v cmd: '%s %s' output: %q",
err, cryptsetupCmd, strings.Join(cryptsetupArgs, " "), string(out))
return fmt.Errorf("luksOpen during format failed: %w", err)
}

defer func() {
e := luksClose(ctx.VolumeName, log)
if e != nil {
log.Errorf("cannot close luks device: %s", e.Error())
}
}()
if opened {
defer func() {
if e := luksClose(ctx.VolumeName, log); e != nil {
log.Errorf("cannot close luks device: %s", e.Error())
if err == nil {
err = fmt.Errorf("luksClose after format failed: %w", e)
}
}
}()
}

// replace the source volume with the mapped one in the arguments to mkfs
mkfsNewArgs := make([]string, len(mkfsArgs))
Expand All @@ -187,10 +190,10 @@ func luksFormat(source string, mkfsCmd string, mkfsArgs []string, ctx LuksContex
"args": mkfsArgs,
}).Info("executing format command")

out, err = exec.Command(mkfsCmd, mkfsArgs...).CombinedOutput()
mkfsOut, err := exec.Command(mkfsCmd, mkfsArgs...).CombinedOutput()
if err != nil {
return fmt.Errorf("formatting disk failed: %v cmd: '%s %s' output: %q",
err, mkfsCmd, strings.Join(mkfsArgs, " "), string(out))
err, mkfsCmd, strings.Join(mkfsArgs, " "), string(mkfsOut))
}

return nil
Expand All @@ -203,14 +206,13 @@ func luksPrepareMount(source string, ctx LuksContext, log *logrus.Entry) (string
return "", err
}
defer func() {
err := os.Remove(filename)
if err != nil {
if err := os.Remove(filename); err != nil {
log.Errorf("cannot delete temporary file %s: %s", filename, err.Error())
}
}()

err = luksOpen(source, filename, ctx, log)
if err != nil {
// The mapping is intentionally kept open until NodeUnstageVolume.
if _, err := luksOpen(source, filename, ctx, log); err != nil {
return "", err
}
return "/dev/mapper/" + ctx.VolumeName, nil
Expand Down Expand Up @@ -238,7 +240,7 @@ func luksClose(volume string, log *logrus.Entry) error {

// checks if the given volume is formatted by checking if it is a luks volume and
// if the luks volume, once opened, contains a filesystem
func isLuksVolumeFormatted(volume string, ctx LuksContext, log *logrus.Entry) (bool, error) {
func isLuksVolumeFormatted(volume string, ctx LuksContext, log *logrus.Entry) (formatted bool, err error) {
isLuks, err := isLuks(volume)
if err != nil {
return false, err
Expand All @@ -252,38 +254,67 @@ func isLuksVolumeFormatted(volume string, ctx LuksContext, log *logrus.Entry) (b
return false, err
}
defer func() {
e := os.Remove(filename)
if e != nil {
if e := os.Remove(filename); e != nil {
log.Errorf("cannot delete temporary file %s: %s", filename, e.Error())
}
}()

err = luksOpen(volume, filename, ctx, log)
opened, err := luksOpen(volume, filename, ctx, log)
if err != nil {
return false, err
}
defer func() {
e := luksClose(ctx.VolumeName, log)
if e != nil {
log.Errorf("cannot close luks device: %s", e.Error())
}
}()
if opened {
defer func() {
if e := luksClose(ctx.VolumeName, log); e != nil {
log.Errorf("cannot close luks device: %s", e.Error())
if err == nil {
err = fmt.Errorf("luksClose after format check failed: %w", e)
}
}
}()
}

return isVolumeFormatted(volume, log)
}

func luksOpen(volume string, keyFile string, ctx LuksContext, log *logrus.Entry) error {
// check if the luks volume is already open
if _, err := os.Stat("/dev/mapper/" + ctx.VolumeName); !os.IsNotExist(err) {
log.WithFields(logrus.Fields{
"volume": volume,
}).Info("luks volume is already open")
return nil
// luksOpen ensures that /dev/mapper/<ctx.VolumeName> exists and is backed by
// the device referenced by `volume`. The boolean return value reports whether
// this call actually opened the mapping (true) or found an existing mapping
// it validated and reused (false). Callers that registered a deferred
// luksClose should gate it on this flag so they do not close a mapping they
// did not open.
func luksOpen(volume string, keyFile string, ctx LuksContext, log *logrus.Entry) (bool, error) {
mapperPath := "/dev/mapper/" + ctx.VolumeName
if _, statErr := os.Stat(mapperPath); statErr == nil {
// A mapping with this name already exists. Confirm that it is
// backed by the device we just resolved before reusing it.
// Without this check, a stale mapping left over from a
// previously-attached cloudscale volume can end up mounted over a
// freshly attached volume. Once two staging paths share one
// device-mapper minor, GetDeviceMountRefs refuses every subsequent
// unstage and the node accumulates unrecoverable state.
inactive, backing, err := validateExistingLuksMapping(ctx.VolumeName, volume, cryptsetupStatus)
if err != nil {
return false, err
}
if inactive {
// The mapping was closed between Stat and status (another
// goroutine raced us). Fall through to open it fresh.
log.WithField("volume", volume).Info("luks mapping vanished between stat and status, opening fresh")
} else {
log.WithFields(logrus.Fields{
"volume": volume,
"backing": backing,
}).Info("luks volume is already open and backing device matches")
return false, nil
}
} else if !os.IsNotExist(statErr) {
return false, fmt.Errorf("stat %s: %w", mapperPath, statErr)
}

cryptsetupCmd, err := getCryptsetupCmd()
if err != nil {
return err
return false, err
}
cryptsetupArgs := []string{
"--batch-mode",
Expand All @@ -297,22 +328,31 @@ func luksOpen(volume string, keyFile string, ctx LuksContext, log *logrus.Entry)
}).Info("executing cryptsetup luksOpen command")
out, err := exec.Command(cryptsetupCmd, cryptsetupArgs...).CombinedOutput()
if err != nil {
return fmt.Errorf("cryptsetup luksOpen failed: %v cmd: '%s %s' output: %q",
return false, fmt.Errorf("cryptsetup luksOpen failed: %v cmd: '%s %s' output: %q",
err, cryptsetupCmd, strings.Join(cryptsetupArgs, " "), string(out))
}
return nil
return true, nil
}

// runs cryptsetup resize for a given volume (/dev/mapper/pvc-xyz)
func luksResize(volume string) error {
func luksResize(volume string, log *logrus.Entry) error {
cryptsetupCmd, err := getCryptsetupCmd()
if err != nil {
return err
}
cryptsetupArgs := []string{"--batch-mode", "resize", volume}

_, err = exec.Command(cryptsetupCmd, cryptsetupArgs...).CombinedOutput()
return err
log.WithFields(logrus.Fields{
"cmd": cryptsetupCmd,
"args": cryptsetupArgs,
}).Info("executing cryptsetup resize command")

out, err := exec.Command(cryptsetupCmd, cryptsetupArgs...).CombinedOutput()
if err != nil {
return fmt.Errorf("cryptsetup resize failed: %v cmd: '%s %s' output: %q",
err, cryptsetupCmd, strings.Join(cryptsetupArgs, " "), string(out))
}
return nil
}

// runs cryptsetup isLuks for a given volume
Expand All @@ -332,38 +372,141 @@ func isLuks(volume string) (bool, error) {
return true, nil
}

// check is a given mapping under /dev/mapper is a luks volume
func isLuksMapping(volume string) (bool, string, error) {
if strings.HasPrefix(volume, "/dev/mapper/") {
mappingName := volume[len("/dev/mapper/"):]
cryptsetupCmd, err := getCryptsetupCmd()
if err != nil {
return false, mappingName, err
}
cryptsetupArgs := []string{"status", mappingName}
// cryptsetupStatusInfo holds the fields we parse from `cryptsetup status <name>`.
type cryptsetupStatusInfo struct {
backing string
isLuks bool
isInactive bool
}

out, err := exec.Command(cryptsetupCmd, cryptsetupArgs...).CombinedOutput()
if err != nil {
return false, mappingName, nil
// parseCryptsetupStatus extracts the LUKS-ness, backing device, and
// active/inactive state from the output of `cryptsetup status <name>`.
// The first line is the state header, always one of:
//
// "/dev/mapper/<name> is inactive."
// "/dev/mapper/<name> is active and is in use."
// "/dev/mapper/<name> is active."
//
// Subsequent lines (active case only) are " key: value", parsed in the
// loop below.
func parseCryptsetupStatus(out []byte) cryptsetupStatusInfo {
var info cryptsetupStatusInfo
scanner := bufio.NewScanner(bytes.NewReader(out))
if !scanner.Scan() {
return info
}
if strings.Contains(scanner.Text(), " is inactive") {
info.isInactive = true
return info
}
for scanner.Scan() {
key, value, ok := strings.Cut(scanner.Text(), ":")
if !ok {
continue
}
for _, statusLine := range strings.Split(string(out), "\n") {
if strings.Contains(statusLine, "type:") {
if strings.Contains(strings.ToLower(statusLine), "luks") {
return true, mappingName, nil
}
return false, mappingName, nil
switch strings.TrimSpace(key) {
case "type":
if strings.Contains(strings.ToLower(value), "luks") {
info.isLuks = true
}
case "device":
info.backing = strings.TrimSpace(value)
}
}
return info
}

// validateExistingLuksMapping confirms that the existing /dev/mapper/<mapperName>
// is backed by the same block device as `volume`. It returns:
// - (true, "", nil) when the mapping is no longer active (a concurrent
// close raced the stat); the caller should re-open.
// - (false, backing, nil) when the mapping is safe to reuse; `backing` is
// the device path as reported by cryptsetup (useful for logging).
// - (false, "", err) when cryptsetup status fails, the mapping is
// active but reports no backing device, or the backing device does not
// match the resolved `volume`.
//
// The statusFn parameter exists so tests can substitute a fake without
// actually shelling out to cryptsetup.
func validateExistingLuksMapping(
mapperName, volume string,
statusFn func(string) (cryptsetupStatusInfo, error),
) (isInactive bool, backing string, err error) {
info, err := statusFn(mapperName)
if err != nil {
return false, "", fmt.Errorf("luks mapping %s exists but cryptsetup status failed: %w",
mapperName, err)
}
if info.isInactive {
return true, "", nil
}
if info.backing == "" {
return false, "", fmt.Errorf("luks mapping %s is active but cryptsetup status reported no backing device",
mapperName)
}
expected, err := filepath.EvalSymlinks(volume)
if err != nil {
return false, "", fmt.Errorf("cannot resolve volume %s for luks mapping validation: %w",
volume, err)
}
// Resolve both sides through EvalSymlinks: the cryptsetup we ship today
// canonicalises the device path via realpath() at open time, but a future
// cryptsetup could preserve the caller-supplied symlink. Comparing
// resolved-against-resolved keeps the check correct either way.
backingResolved, evalErr := filepath.EvalSymlinks(info.backing)
if evalErr != nil {
backingResolved = info.backing
}
Comment thread
mweibel marked this conversation as resolved.
if backingResolved != expected {
return false, "", fmt.Errorf("luks mapping %s is backed by %s (resolved: %s), expected %s, refusing to reuse stale mapping",
mapperName, info.backing, backingResolved, expected)
}
return false, info.backing, nil
}

// cryptsetupStatus runs `cryptsetup status <name>` and returns the parsed
// info. A mapping that is reported as inactive (the normal "no such mapping"
// case) is returned with info.isInactive == true and a nil error so callers
// can distinguish it from real failures — mirroring the sentinel pattern in
// ceph-csi's DeviceEncryptionStatus.
func cryptsetupStatus(name string) (cryptsetupStatusInfo, error) {
cryptsetupCmd, err := getCryptsetupCmd()
if err != nil {
return cryptsetupStatusInfo{}, err
}
out, err := exec.Command(cryptsetupCmd, "status", name).CombinedOutput()
info := parseCryptsetupStatus(out)
if err != nil {
if info.isInactive {
return info, nil
}
return info, fmt.Errorf("cryptsetup status %s failed: %v output: %q",
name, err, string(out))
}
return info, nil
}

// check is a given mapping under /dev/mapper is a luks volume
func isLuksMapping(volume string) (bool, string, error) {
if !strings.HasPrefix(volume, "/dev/mapper/") {
return false, "", nil
}
mappingName := volume[len("/dev/mapper/"):]
info, err := cryptsetupStatus(mappingName)
if err != nil {
return false, mappingName, err
}
if info.isInactive {
return false, "", nil
}
return false, "", nil
return info.isLuks, mappingName, nil
}

func getCryptsetupCmd() (string, error) {
cryptsetupCmd := "cryptsetup"
_, err := exec.LookPath(cryptsetupCmd)
if err != nil {
if err == exec.ErrNotFound {
if errors.Is(err, exec.ErrNotFound) {
return "", fmt.Errorf("%q executable not found in $PATH", cryptsetupCmd)
}
return "", err
Expand Down
Loading