From eafa47ea26f6a4d560625f168b7987e02dc610dd Mon Sep 17 00:00:00 2001 From: Abhinav Agarwal Date: Mon, 18 May 2026 22:45:33 -0700 Subject: [PATCH] ci: add Valgrind memcheck job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add valgrind-memcheck job running full pytest suite under memcheck with continue-on-error: true - Uses --error-exitcode=99 with definite/indirect leak checking - Currently finding real sshfs-owned leaks (buf_init/sftp_read, buf_init/sftp_send_iov, cache_add_link) — will go green once those are fixed - Raw Valgrind logs written via --log-file and uploaded as artifacts alongside JUnit XML - Add VALGRIND_OPTIONS support to test/util.py via shlex.split - Suppression file covers only third-party libfuse worker-thread teardown leaks - Fix fusermount/fusermount3 mismatch, scale timeouts 4x under Valgrind - Set G_DEBUG/G_SLICE for cleaner Valgrind output - Hard-fail FUSE preflight, workflow-level permissions and concurrency - All actions pinned to Node 24-capable SHAs, runner pinned to ubuntu-24.04 --- .github/workflows/build-ubuntu.yml | 88 ++++++++++++++++++++++++++++-- test/conftest.py | 14 +++++ test/util.py | 25 +++++++-- test/valgrind.supp | 25 +++++++++ 4 files changed, 141 insertions(+), 11 deletions(-) create mode 100644 test/valgrind.supp diff --git a/.github/workflows/build-ubuntu.yml b/.github/workflows/build-ubuntu.yml index a6eb30df..51ce94d6 100644 --- a/.github/workflows/build-ubuntu.yml +++ b/.github/workflows/build-ubuntu.yml @@ -7,8 +7,19 @@ on: workflow_dispatch: # this is a nice option that will enable a button w/ inputs inputs: git-ref: - description: Git Ref (Optional) + description: Git Ref (Optional) required: false + + schedule: + - cron: '0 5 * * 1' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: build-and-test: name: Build and test @@ -22,16 +33,15 @@ jobs: - name: Install build dependencies run: | sudo apt-get update - sudo apt-get install valgrind gcc ninja-build meson libglib2.0-dev libfuse3-dev + sudo apt-get install valgrind gcc ninja-build libglib2.0-dev libfuse3-dev - name: Install meson run: pip3 install meson pytest - name: build run: | - mkdir build; cd build - meson .. - ninja + meson setup build + ninja -C build # cd does not persist across steps - name: upload build artifact @@ -49,3 +59,71 @@ jobs: run: | cd build python3 -m pytest test/ + + valgrind-memcheck: + name: Valgrind memcheck + runs-on: ubuntu-24.04 + timeout-minutes: 35 + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y valgrind gcc ninja-build libglib2.0-dev libfuse3-dev openssh-client openssh-server fuse3 + + - name: Install Python dependencies + run: pip3 install meson pytest pytest-timeout + + - name: Build sshfs + run: | + meson setup build + ninja -C build + + - name: Setup SSH + run: | + mkdir -p ~/.ssh + chmod 700 ~/.ssh + ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" + cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys + chmod 600 ~/.ssh/authorized_keys + sudo systemctl start ssh || sudo service ssh start + ssh -o StrictHostKeyChecking=no -o BatchMode=yes localhost true + + - name: Check FUSE availability + run: | + test -e /dev/fuse + command -v fusermount3 + + - name: Create Valgrind log directory + run: mkdir -p valgrind-logs + + - name: Run tests under Valgrind memcheck + timeout-minutes: 30 + env: + TEST_WITH_VALGRIND: "true" + VALGRIND_OPTIONS: "--tool=memcheck --leak-check=full --show-leak-kinds=definite,indirect --errors-for-leak-kinds=definite,indirect --error-exitcode=99 --num-callers=25 --suppressions=${{ github.workspace }}/test/valgrind.supp --log-file=${{ github.workspace }}/valgrind-logs/memcheck.%p.log -q" + G_DEBUG: "fatal-warnings,gc-friendly" + G_SLICE: "always-malloc" + run: | + cd build + python3 -m pytest -q --tb=short --maxfail=99 --timeout=300 \ + test/ \ + --junitxml=test-results-valgrind.xml + + - name: Upload test results + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: test-results-valgrind + path: | + build/test-results-valgrind.xml + build/meson-logs/ + valgrind-logs/ diff --git a/test/conftest.py b/test/conftest.py index 9416dde4..44fca6e8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,4 +1,5 @@ import sys +import os import pytest import time import re @@ -85,6 +86,13 @@ def register_output(self, pattern, count=1, flags=re.MULTILINE): current_capfd = None +_running_with_valgrind = os.environ.get("TEST_WITH_VALGRIND", "no").lower().strip() not in ( + "no", + "false", + "0", +) + + @pytest.fixture(autouse=True) def save_cap_fixtures(request, capfd): global current_capfd @@ -93,6 +101,12 @@ def save_cap_fixtures(request, capfd): # Monkeypatch in a function to register false positives type(capfd).register_output = register_output + # When running under Valgrind, its ==pid== summary lines on stderr are + # expected. Register them as false positives so check_test_output does + # not mistake them for suspicious output. + if _running_with_valgrind: + capfd.false_positives.append((r"^==[0-9]+==[^\n]*\n", re.MULTILINE, 0)) + if request.config.getoption("capture") == "no": capfd = None current_capfd = capfd diff --git a/test/util.py b/test/util.py index ce443899..b495155b 100644 --- a/test/util.py +++ b/test/util.py @@ -9,6 +9,14 @@ basename = pjoin(os.path.dirname(__file__), "..") +_valgrind_timeout_multiplier = ( + 4 + if os.environ.get("TEST_WITH_VALGRIND", "no").lower().strip() + not in ("no", "false", "0") + else 1 +) +_mount_timeout = 30 * _valgrind_timeout_multiplier + def os_create(name): os.close(os.open(name, os.O_CREAT | os.O_RDWR)) @@ -25,7 +33,7 @@ def os_open(name, flags): def wait_for_mount(mount_process, mnt_dir, test_fn=os.path.ismount): elapsed = 0 - while elapsed < 30: + while elapsed < _mount_timeout: if test_fn(mnt_dir): return True if mount_process.poll() is not None: @@ -37,7 +45,7 @@ def wait_for_mount(mount_process, mnt_dir, test_fn=os.path.ismount): def cleanup(mount_process, mnt_dir): subprocess.call( - ["fusermount", "-z", "-u", mnt_dir], + ["fusermount3", "-z", "-u", mnt_dir], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, ) @@ -55,7 +63,7 @@ def umount(mount_process, mnt_dir): # Give mount process a little while to terminate. Popen.wait(timeout) # was only added in 3.3... elapsed = 0 - while elapsed < 30: + while elapsed < _mount_timeout: code = mount_process.poll() if code is not None: if code == 0: @@ -93,12 +101,12 @@ def skip(reason: str): return pytest.mark.skip(reason=reason) with subprocess.Popen( - ["which", "fusermount"], stdout=subprocess.PIPE, universal_newlines=True + ["which", "fusermount3"], stdout=subprocess.PIPE, universal_newlines=True ) as which: fusermount_path = which.communicate()[0].strip() if not fusermount_path or which.returncode != 0: - return skip("Can't find fusermount executable") + return skip("Can't find fusermount3 executable") if not os.path.exists("/dev/fuse"): return skip("FUSE kernel module does not seem to be loaded") @@ -126,6 +134,11 @@ def skip(reason: str): "false", "0", ): - base_cmdline = ["valgrind", "-q", "--"] + import shlex + valgrind_options_env = os.environ.get("VALGRIND_OPTIONS", "") + if valgrind_options_env: + base_cmdline = ["valgrind"] + shlex.split(valgrind_options_env) + ["--"] + else: + base_cmdline = ["valgrind", "-q", "--"] else: base_cmdline = [] diff --git a/test/valgrind.supp b/test/valgrind.supp new file mode 100644 index 00000000..665316ec --- /dev/null +++ b/test/valgrind.supp @@ -0,0 +1,25 @@ +# Valgrind suppression file for sshfs tests. +# +# Keep this file minimal. Only suppress stacks that originate entirely in +# third-party libraries (GLib, libfuse, glibc, pthreads) and have been +# confirmed as false positives or benign teardown noise. +# +# Do NOT suppress any stack frame that includes sshfs.c or cache.c unless +# there is a documented upstream false positive with a linked note below. +# +# To generate candidates locally: +# TEST_WITH_VALGRIND=true VALGRIND_OPTIONS="--tool=memcheck --leak-check=full \ +# --gen-suppressions=all -q --" python3 -m pytest test/test_sshfs.py ... + +# libfuse allocates thread-local or worker-thread state inside its shared +# library during pthread_create. These are not reachable after the threads +# exit but are never explicitly freed — they are benign teardown leaks in +# libfuse internals, not sshfs bugs. +{ + libfuse-worker-thread-alloc + Memcheck:Leak + match-leak-kinds: definite,indirect + fun:calloc + ... + obj:*/libfuse3.so* +}