From aa1b1676cb6e2ef663e8357b7a4fe18a976176b1 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 5 Feb 2026 11:51:00 +0000 Subject: [PATCH] Fix SFTP server hang on WS_WANT_WRITE with non-blocking sockets When wolfSSH_SFTP_buffer_send() called wolfSSH_stream_send(), the data would be consumed into the SSH output buffer even if the underlying socket returned EWOULDBLOCK/EAGAIN. SendChannelData() returns the positive dataSz on WS_WANT_WRITE, causing the SFTP layer to advance its buffer index as if the data was sent. The SSH output buffer still had pending data that was never flushed, leading to an indefinite hang. Fix: At the start of wolfSSH_SFTP_buffer_send(), check if there's pending data in ssh->outputBuffer from a previous WS_WANT_WRITE. If so, attempt to flush it first and return WS_WANT_WRITE if the flush fails. This ensures the caller retries until all pending data is sent. Also expose WS_SFTP_BUFFER and wolfSSH_SFTP_buffer_send() as WOLFSSH_LOCAL for unit testing, and add regression test that verifies the fix catches the bug. Fixes ZD 21157 --- .github/workflows/paramiko-sftp-test.yml | 68 +++++++++++++++--- src/wolfsftp.c | 30 ++++++-- tests/regress.c | 90 ++++++++++++++++++++++++ wolfssh/wolfsftp.h | 10 +++ 4 files changed, 183 insertions(+), 15 deletions(-) diff --git a/.github/workflows/paramiko-sftp-test.yml b/.github/workflows/paramiko-sftp-test.yml index 542eba102..d27469823 100644 --- a/.github/workflows/paramiko-sftp-test.yml +++ b/.github/workflows/paramiko-sftp-test.yml @@ -132,12 +132,29 @@ jobs: import os import time import sys - + import hashlib + import signal + + # Timeout handler for detecting hangs + class TimeoutError(Exception): + pass + + def timeout_handler(signum, frame): + raise TimeoutError("SFTP operation timed out - possible hang detected!") + + def get_file_hash(filepath): + """Calculate MD5 hash of a file.""" + hash_md5 = hashlib.md5() + with open(filepath, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + def run_sftp_test(): # Create SSH client ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - + # Connect to server using password authentication with testuser print("Connecting to wolfSSHd server...") try: @@ -145,32 +162,67 @@ jobs: except Exception as e: print(f"Connection error: {e}") raise - + # Open SFTP session print("Opening SFTP session...") sftp = ssh.open_sftp() - + # Upload test print("Uploading 20MB test file...") start_time = time.time() sftp.put('/tmp/sftp_upload/test_upload.dat', '/tmp/test_upload.dat') upload_time = time.time() - start_time print(f"Upload completed in {upload_time:.2f} seconds") - + # Download test print("Downloading 20MB test file...") start_time = time.time() sftp.get('/tmp/test_upload.dat', '/tmp/sftp_download/test_download.dat') download_time = time.time() - start_time print(f"Download completed in {download_time:.2f} seconds") - + + # Stress test: Repeated GET operations with prefetch + # This tests the WS_WANT_WRITE handling during repeated transfers + # which was the original bug trigger (SFTP hang on non-blocking sockets) + print("\n=== Starting repeated GET stress test (prefetch enabled) ===") + num_iterations = 10 + timeout_seconds = 30 # Per-operation timeout to detect hangs + + for i in range(num_iterations): + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(timeout_seconds) + try: + download_path = f'/tmp/sftp_download/stress_test_{i}.dat' + start_time = time.time() + # Paramiko uses prefetch by default for get() + sftp.get('/tmp/test_upload.dat', download_path) + elapsed = time.time() - start_time + signal.alarm(0) # Cancel alarm + + # Verify integrity + orig_hash = get_file_hash('/tmp/sftp_upload/test_upload.dat') + down_hash = get_file_hash(download_path) + if orig_hash != down_hash: + print(f" Iteration {i+1}: FAILED - hash mismatch!") + return False + + print(f" Iteration {i+1}/{num_iterations}: OK ({elapsed:.2f}s)") + os.remove(download_path) # Cleanup + + except TimeoutError as e: + print(f" Iteration {i+1}: FAILED - {e}") + print(" This may indicate the WS_WANT_WRITE hang bug!") + return False + + print(f"=== Stress test completed: {num_iterations} iterations OK ===\n") + # Close connections sftp.close() ssh.close() - + print("SFTP session closed") return True - + if __name__ == "__main__": try: success = run_sftp_test() diff --git a/src/wolfsftp.c b/src/wolfsftp.c index 9386d157e..8c9f2b916 100644 --- a/src/wolfsftp.c +++ b/src/wolfsftp.c @@ -145,13 +145,8 @@ enum WS_SFTP_LSTAT_STATE_ID { STATE_LSTAT_CLEANUP }; -/* This structure is to help with nonblocking and keeping track of state. +/* WS_SFTP_BUFFER is defined in wolfsftp.h for use with nonblocking state. * If adding any read/writes use the wolfSSH_SFTP_buffer_read/send functions */ -typedef struct WS_SFTP_BUFFER { - byte* data; - word32 sz; - word32 idx; -} WS_SFTP_BUFFER; typedef struct WS_SFTP_CHMOD_STATE { enum WS_SFTP_CHMOD_STATE_ID state; @@ -513,7 +508,7 @@ static void wolfSSH_SFTP_buffer_rewind(WS_SFTP_BUFFER* buffer) /* try to send the rest of the buffer (buffer.sz - buffer.idx) * increments idx with amount sent */ -static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) +WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) { int ret = WS_SUCCESS; int err; @@ -526,6 +521,19 @@ static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) return WS_BUFFER_E; } + /* Flush any pending data in SSH output buffer first. + * Handles case where previous send returned WS_WANT_WRITE + * and data is still buffered at the SSH layer. */ + if (ssh->outputBuffer.length > ssh->outputBuffer.idx) { + ret = wolfSSH_SendPacket(ssh); + if (ret == WS_WANT_WRITE) { + return ret; + } + if (ret < 0) { + return ret; + } + } + /* Call wolfSSH worker if rekeying or adjusting window size */ err = wolfSSH_get_error(ssh); if (err == WS_WINDOW_FULL || err == WS_REKEYING) { @@ -1603,6 +1611,14 @@ int wolfSSH_SFTP_read(WOLFSSH* ssh) ssh->error = WS_WANT_WRITE; return WS_FATAL_ERROR; } + /* Check if SSH layer still has pending data from WS_WANT_WRITE. + * Even if SFTP buffer is fully consumed, the data may still be + * sitting in the SSH output buffer waiting to be sent. */ + if (ssh->error == WS_WANT_WRITE || + ssh->outputBuffer.length > ssh->outputBuffer.idx) { + ssh->error = WS_WANT_WRITE; + return WS_FATAL_ERROR; + } ret = WS_SUCCESS; state->toSend = 0; } diff --git a/tests/regress.c b/tests/regress.c index 5f6bcf170..69c24b627 100644 --- a/tests/regress.c +++ b/tests/regress.c @@ -40,6 +40,9 @@ #include #include #include +#ifdef WOLFSSH_SFTP + #include +#endif #include "apps/wolfssh/common.h" #ifndef WOLFSSH_NO_ABORT @@ -482,6 +485,89 @@ static void TestWorkerReadsWhenSendWouldBlock(void) #endif +#ifdef WOLFSSH_SFTP +/* Test that wolfSSH_SFTP_buffer_send() properly handles WS_WANT_WRITE when + * SSH output buffer has pending data. This is a regression test for + * the SFTP hang issue with non-blocking sockets. + * + * The fix checks for pending data in ssh->outputBuffer at the start of + * wolfSSH_SFTP_buffer_send() and returns WS_WANT_WRITE if the flush fails. */ +static int sftpWantWriteCallCount = 0; + +static int SftpWantWriteSendCb(WOLFSSH* ssh, void* buf, word32 sz, void* ctx) +{ + (void)ssh; (void)buf; (void)ctx; + sftpWantWriteCallCount++; + /* First call returns WANT_WRITE, subsequent calls succeed */ + if (sftpWantWriteCallCount == 1) { + return WS_CBIO_ERR_WANT_WRITE; + } + return (int)sz; +} + +static int SftpDummyRecv(WOLFSSH* ssh, void* buf, word32 sz, void* ctx) +{ + (void)ssh; (void)buf; (void)sz; (void)ctx; + return WS_CBIO_ERR_WANT_READ; +} + +static void TestSftpBufferSendPendingOutput(void) +{ + WOLFSSH_CTX* ctx; + WOLFSSH* ssh; + WS_SFTP_BUFFER sftpBuf; + byte testData[16]; + int ret; + + ctx = wolfSSH_CTX_new(WOLFSSH_ENDPOINT_SERVER, NULL); + AssertNotNull(ctx); + + wolfSSH_SetIOSend(ctx, SftpWantWriteSendCb); + wolfSSH_SetIORecv(ctx, SftpDummyRecv); + + ssh = wolfSSH_new(ctx); + AssertNotNull(ssh); + + /* Set up SFTP buffer with some data to send */ + WMEMSET(testData, 0x42, sizeof(testData)); + WMEMSET(&sftpBuf, 0, sizeof(sftpBuf)); + sftpBuf.data = testData; + sftpBuf.sz = sizeof(testData); + sftpBuf.idx = 0; + + /* Simulate pending data in SSH output buffer (as if previous send + * returned WS_WANT_WRITE and data was buffered). + * Note: outputBuffer is initialized by BufferInit() with bufferSz set + * to at least STATIC_BUFFER_LEN (16 bytes), so we use a smaller value. */ + ssh->outputBuffer.length = 8; /* 8 bytes pending */ + ssh->outputBuffer.idx = 0; /* none sent yet */ + + sftpWantWriteCallCount = 0; + + /* Call wolfSSH_SFTP_buffer_send - should return WS_WANT_WRITE because + * the fix detects pending data in outputBuffer and tries to flush it, + * which fails with WS_WANT_WRITE from our callback. + * + * Before the fix, the function would ignore the pending SSH output buffer + * data and proceed to send new SFTP data, leading to a hang because the + * pending data was never flushed. */ + ret = wolfSSH_SFTP_buffer_send(ssh, &sftpBuf); + AssertIntEQ(ret, WS_WANT_WRITE); + + /* Verify the SFTP buffer was NOT consumed (idx should still be 0). + * This is critical - the SFTP layer must not advance its buffer index + * until the SSH output buffer is flushed. */ + AssertIntEQ(sftpBuf.idx, 0); + + /* Verify the SSH output buffer still has pending data */ + AssertTrue(ssh->outputBuffer.length > ssh->outputBuffer.idx); + + wolfSSH_free(ssh); + wolfSSH_CTX_free(ctx); +} +#endif /* WOLFSSH_SFTP */ + + int main(int argc, char** argv) { WOLFSSH_CTX* ctx; @@ -515,6 +601,10 @@ int main(int argc, char** argv) TestWorkerReadsWhenSendWouldBlock(); #endif +#ifdef WOLFSSH_SFTP + TestSftpBufferSendPendingOutput(); +#endif + /* TODO: add app-level regressions that simulate stdin EOF/password * prompts and mid-session socket closes once the test harness can * drive the wolfssh client without real sockets/tty. */ diff --git a/wolfssh/wolfsftp.h b/wolfssh/wolfsftp.h index f2084524e..1e8b9a40d 100644 --- a/wolfssh/wolfsftp.h +++ b/wolfssh/wolfsftp.h @@ -173,6 +173,13 @@ struct WS_SFTPNAME { #define WOLFSSH_MAX_SFTP_RECV 32768 #endif +/* SFTP buffer for nonblocking state tracking */ +typedef struct WS_SFTP_BUFFER { + byte* data; + word32 sz; + word32 idx; +} WS_SFTP_BUFFER; + /* functions for establishing a connection */ WOLFSSH_API int wolfSSH_SFTP_accept(WOLFSSH* ssh); WOLFSSH_API int wolfSSH_SFTP_connect(WOLFSSH* ssh); @@ -282,6 +289,9 @@ WOLFSSL_LOCAL int SFTP_RemoveHandleNode(WOLFSSH* ssh, byte* handle, WOLFSSH_LOCAL void wolfSSH_SFTP_ShowSizes(void); +/* SFTP buffer send - exposed for testing */ +WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer); + #ifdef __cplusplus } #endif