diff --git a/Cargo.lock b/Cargo.lock
index e283f0c5557..ddc1dc5b359 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -537,6 +537,7 @@ name = "coreutils"
 version = "0.5.0"
 dependencies = [
  "bincode",
+ "bytecount",
  "chrono",
  "clap",
  "clap_complete",
@@ -563,6 +564,7 @@ dependencies = [
  "tempfile",
  "textwrap",
  "time",
+ "unicode-width 0.2.2",
  "unindent",
  "uu_arch",
  "uu_base32",
diff --git a/Cargo.toml b/Cargo.toml
index b388373a2aa..ac6763ca346 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -535,6 +535,7 @@ ctor.workspace = true
 filetime.workspace = true
 glob.workspace = true
 libc.workspace = true
+bytecount.workspace = true
 num-prime.workspace = true
 pretty_assertions = "1.4.0"
 rand.workspace = true
@@ -542,6 +543,7 @@ regex.workspace = true
 sha1 = { workspace = true, features = ["std"] }
 tempfile.workspace = true
 time = { workspace = true, features = ["local-offset"] }
+unicode-width.workspace = true
 unindent = "0.2.3"
 uutests.workspace = true
 uucore = { workspace = true, features = [
diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs
index 2eb97933180..00b3a45cc77 100644
--- a/src/uu/fold/src/fold.rs
+++ b/src/uu/fold/src/fold.rs
@@ -19,6 +19,8 @@ const TAB_WIDTH: usize = 8;
 const NL: u8 = b'\n';
 const CR: u8 = b'\r';
 const TAB: u8 = b'\t';
+// Implementation threshold (8 KiB) to prevent unbounded buffer growth during streaming.
+const STREAMING_FLUSH_THRESHOLD: usize = 8 * 1024;
 
 mod options {
     pub const BYTES: &str = "bytes";
@@ -288,6 +290,8 @@ fn compute_col_count(buffer: &[u8], mode: WidthMode) -> usize {
 }
 
 fn emit_output<W: Write>(ctx: &mut FoldContext<'_, W>) -> UResult<()> {
+    // Emit a folded line and keep the remaining buffer (if any) for the next line.
+    // When `-s` is active, we prefer breaking at the last recorded whitespace.
     let consume = match *ctx.last_space {
         Some(index) => index + 1,
         None => ctx.output.len(),
@@ -322,6 +326,36 @@ fn emit_output<W: Write>(ctx: &mut FoldContext<'_, W>) -> UResult<()> {
     Ok(())
 }
 
+fn maybe_flush_unbroken_output<W: Write>(ctx: &mut FoldContext<'_, W>) -> UResult<()> {
+    // In streaming mode without `-s`, avoid unbounded buffering by periodically
+    // flushing long unbroken output segments. When `-s` is enabled we must keep
+    // the buffer to preserve the last whitespace boundary for folding.
+    if ctx.spaces || ctx.output.len() < STREAMING_FLUSH_THRESHOLD {
+        return Ok(());
+    }
+
+    if !ctx.output.is_empty() {
+        ctx.writer.write_all(ctx.output)?;
+        ctx.output.clear();
+    }
+    Ok(())
+}
+
+fn push_byte<W: Write>(ctx: &mut FoldContext<'_, W>, byte: u8) -> UResult<()> {
+    // Append a single byte and flush if the buffer grows too large.
+    ctx.output.push(byte);
+    maybe_flush_unbroken_output(ctx)
+}
+
+fn push_bytes<W: Write>(ctx: &mut FoldContext<'_, W>, bytes: &[u8]) -> UResult<()> {
+    // Append a byte slice and flush if the buffer grows too large.
+    if bytes.is_empty() {
+        return Ok(());
+    }
+    ctx.output.extend_from_slice(bytes);
+    maybe_flush_unbroken_output(ctx)
+}
+
 fn process_ascii_line<W: Write>(line: &[u8], ctx: &mut FoldContext<'_, W>) -> UResult<()> {
     let mut idx = 0;
     let len = line.len();
@@ -331,15 +365,15 @@ fn process_ascii_line<W: Write>(line: &[u8], ctx: &mut FoldContext<'_, W>) -> UR
             NL => {
                 *ctx.last_space = None;
                 emit_output(ctx)?;
-                break;
+                idx += 1;
             }
             CR => {
-                ctx.output.push(CR);
+                push_byte(ctx, CR)?;
                 *ctx.col_count = 0;
                 idx += 1;
             }
             0x08 => {
-                ctx.output.push(0x08);
+                push_byte(ctx, 0x08)?;
                 *ctx.col_count = ctx.col_count.saturating_sub(1);
                 idx += 1;
             }
@@ -358,16 +392,23 @@ fn process_ascii_line<W: Write>(line: &[u8], ctx: &mut FoldContext<'_, W>) -> UR
                 } else {
                     *ctx.last_space = None;
                 }
-                ctx.output.push(TAB);
+                push_byte(ctx, TAB)?;
                 idx += 1;
             }
             0x00..=0x07 | 0x0B..=0x0C | 0x0E..=0x1F | 0x7F => {
-                ctx.output.push(line[idx]);
+                push_byte(ctx, line[idx])?;
                 if ctx.spaces && line[idx].is_ascii_whitespace() && line[idx] != CR {
                     *ctx.last_space = Some(ctx.output.len() - 1);
                 } else if !ctx.spaces {
                     *ctx.last_space = None;
                 }
+
+                if ctx.mode == WidthMode::Characters {
+                    *ctx.col_count = ctx.col_count.saturating_add(1);
+                    if *ctx.col_count >= ctx.width {
+                        emit_output(ctx)?;
+                    }
+                }
                 idx += 1;
             }
             _ => {
@@ -405,7 +446,7 @@ fn push_ascii_segment<W: Write>(segment: &[u8], ctx: &mut FoldContext<'_, W>) ->
         let take = remaining.len().min(available);
         let base_len = ctx.output.len();
 
-        ctx.output.extend_from_slice(&remaining[..take]);
+        push_bytes(ctx, &remaining[..take])?;
         *ctx.col_count += take;
 
         if ctx.spaces {
@@ -430,16 +471,26 @@ fn process_utf8_line<W: Write>(line: &str, ctx: &mut FoldContext<'_, W>) -> URes
         return process_ascii_line(line.as_bytes(), ctx);
     }
 
+    process_utf8_chars(line, ctx)
+}
+
+fn process_utf8_chars<W: Write>(line: &str, ctx: &mut FoldContext<'_, W>) -> UResult<()> {
     let line_bytes = line.as_bytes();
     let mut iter = line.char_indices().peekable();
 
     while let Some((byte_idx, ch)) = iter.next() {
-        // Include combining characters with the base character
-        while let Some(&(_, next_ch)) = iter.peek() {
-            if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 {
-                iter.next();
-            } else {
-                break;
+        // Include combining characters with the base character when we are
+        // measuring by display columns. In character-counting mode every
+        // scalar value must advance the counter to match `chars().count()`
+        // semantics (see `fold_characters_reference` in the tests), so we do
+        // not coalesce zero-width scalars there.
+        if ctx.mode == WidthMode::Columns {
+            while let Some(&(_, next_ch)) = iter.peek() {
+                if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 {
+                    iter.next();
+                } else {
+                    break;
+                }
             }
         }
 
@@ -448,7 +499,7 @@ fn process_utf8_line<W: Write>(line: &str, ctx: &mut FoldContext<'_, W>) -> URes
         if ch == '\n' {
             *ctx.last_space = None;
             emit_output(ctx)?;
-            break;
+            continue;
         }
 
         if *ctx.col_count >= ctx.width {
@@ -456,15 +507,13 @@ fn process_utf8_line<W: Write>(line: &str, ctx: &mut FoldContext<'_, W>) -> URes
         }
 
         if ch == '\r' {
-            ctx.output
-                .extend_from_slice(&line_bytes[byte_idx..next_idx]);
+            push_bytes(ctx, &line_bytes[byte_idx..next_idx])?;
             *ctx.col_count = 0;
             continue;
         }
 
         if ch == '\x08' {
-            ctx.output
-                .extend_from_slice(&line_bytes[byte_idx..next_idx]);
+            push_bytes(ctx, &line_bytes[byte_idx..next_idx])?;
             *ctx.col_count = ctx.col_count.saturating_sub(1);
             continue;
         }
@@ -484,8 +533,7 @@ fn process_utf8_line<W: Write>(line: &str, ctx: &mut FoldContext<'_, W>) -> URes
             } else {
                 *ctx.last_space = None;
             }
-            ctx.output
-                .extend_from_slice(&line_bytes[byte_idx..next_idx]);
+            push_bytes(ctx, &line_bytes[byte_idx..next_idx])?;
             continue;
         }
 
@@ -506,8 +554,7 @@ fn process_utf8_line<W: Write>(line: &str, ctx: &mut FoldContext<'_, W>) -> URes
             *ctx.last_space = Some(ctx.output.len());
         }
 
-        ctx.output
-            .extend_from_slice(&line_bytes[byte_idx..next_idx]);
+        push_bytes(ctx, &line_bytes[byte_idx..next_idx])?;
         *ctx.col_count = ctx.col_count.saturating_add(added);
     }
 
@@ -519,7 +566,7 @@ fn process_non_utf8_line<W: Write>(line: &[u8], ctx: &mut FoldContext<'_, W>) ->
         if byte == NL {
             *ctx.last_space = None;
             emit_output(ctx)?;
-            break;
+            continue;
         }
 
         if *ctx.col_count >= ctx.width {
@@ -539,7 +586,7 @@ fn process_non_utf8_line<W: Write>(line: &[u8], ctx: &mut FoldContext<'_, W>) ->
                 } else {
                     None
                 };
-                ctx.output.push(byte);
+                push_byte(ctx, byte)?;
                 continue;
             }
             0x08 => *ctx.col_count = ctx.col_count.saturating_sub(1),
@@ -550,7 +597,46 @@ fn process_non_utf8_line<W: Write>(line: &[u8], ctx: &mut FoldContext<'_, W>) ->
             _ => *ctx.col_count = ctx.col_count.saturating_add(1),
         }
 
-        ctx.output.push(byte);
+        push_byte(ctx, byte)?;
+    }
+
+    Ok(())
+}
+
+/// Process buffered bytes, emitting output for valid UTF-8 prefixes and
+/// deferring incomplete sequences until more input arrives.
+///
+/// If the buffer contains invalid UTF-8, it is handled in non-UTF-8 mode and
+/// the buffer is fully consumed.
+fn process_pending_chunk<W: Write>(
+    pending: &mut Vec<u8>,
+    ctx: &mut FoldContext<'_, W>,
+) -> UResult<()> {
+    while !pending.is_empty() {
+        match std::str::from_utf8(pending) {
+            Ok(valid) => {
+                process_utf8_line(valid, ctx)?;
+                pending.clear();
+                break;
+            }
+            Err(err) => {
+                if err.error_len().is_some() {
+                    let res = process_non_utf8_line(pending, ctx);
+                    pending.clear();
+                    res?;
+                    break;
+                }
+
+                let valid_up_to = err.valid_up_to();
+                if valid_up_to == 0 {
+                    break;
+                }
+
+                let valid = std::str::from_utf8(&pending[..valid_up_to]).expect("valid prefix");
+                process_utf8_line(valid, ctx)?;
+                pending.drain(..valid_up_to);
+            }
+        }
     }
 
     Ok(())
@@ -572,20 +658,12 @@ fn fold_file<T: Read, W: Write>(
     mode: WidthMode,
     writer: &mut W,
 ) -> UResult<()> {
-    let mut line = Vec::new();
     let mut output = Vec::new();
     let mut col_count = 0;
     let mut last_space = None;
+    let mut pending = Vec::new();
 
-    loop {
-        if file
-            .read_until(NL, &mut line)
-            .map_err_context(|| translate!("fold-error-readline"))?
-            == 0
-        {
-            break;
-        }
-
+    {
         let mut ctx = FoldContext {
             spaces,
             width,
@@ -596,17 +674,32 @@ fn fold_file<T: Read, W: Write>(
             last_space: &mut last_space,
         };
 
-        match std::str::from_utf8(&line) {
-            Ok(s) => process_utf8_line(s, &mut ctx)?,
-            Err(_) => process_non_utf8_line(&line, &mut ctx)?,
+        loop {
+            let buffer = file
+                .fill_buf()
+                .map_err_context(|| translate!("fold-error-readline"))?;
+            if buffer.is_empty() {
+                break;
+            }
+            pending.extend_from_slice(buffer);
+            let consumed = buffer.len();
+            file.consume(consumed);
+
+            process_pending_chunk(&mut pending, &mut ctx)?;
         }
 
-        line.clear();
-    }
+        if !pending.is_empty() {
+            match std::str::from_utf8(&pending) {
+                Ok(s) => process_utf8_line(s, &mut ctx)?,
+                Err(_) => process_non_utf8_line(&pending, &mut ctx)?,
+            }
+            pending.clear();
+        }
 
-    if !output.is_empty() {
-        writer.write_all(&output)?;
-        output.clear();
+        if !ctx.output.is_empty() {
+            ctx.writer.write_all(ctx.output)?;
+            ctx.output.clear();
+        }
     }
 
     Ok(())
diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs
index 9497044c910..1fe466ba5ad 100644
--- a/tests/by-util/test_fold.rs
+++ b/tests/by-util/test_fold.rs
@@ -4,7 +4,11 @@
 // file that was distributed with this source code.
 // spell-checker:ignore fullwidth
 
+use bytecount::count;
+use unicode_width::UnicodeWidthChar;
 use uutests::new_ucmd;
+use uutests::util::TestScenario;
+use uutests::util_name;
 
 #[test]
 fn test_invalid_arg() {
@@ -61,6 +65,301 @@ fn test_wide_characters_with_characters_option() {
         .stdout_is("\u{B250}\u{B250}\u{B250}\n");
 }
 
+#[test]
+fn test_multiple_wide_characters_in_column_mode() {
+    let wide = '\u{FF1A}';
+    let mut input = wide.to_string().repeat(50);
+    input.push('\n');
+
+    let mut expected = String::new();
+    for i in 1..=50 {
+        expected.push(wide);
+        if i % 5 == 0 {
+            expected.push('\n');
+        }
+    }
+
+    new_ucmd!()
+        .args(&["-w", "10"])
+        .pipe_in(input)
+        .succeeds()
+        .stdout_is(expected);
+}
+
+#[test]
+fn test_multiple_wide_characters_in_character_mode() {
+    let wide = '\u{FF1A}';
+    let mut input = wide.to_string().repeat(50);
+    input.push('\n');
+
+    let mut expected = String::new();
+    for i in 1..=50 {
+        expected.push(wide);
+        if i % 10 == 0 {
+            expected.push('\n');
+        }
+    }
+
+    new_ucmd!()
+        .args(&["--characters", "-w", "10"])
+        .pipe_in(input)
+        .succeeds()
+        .stdout_is(expected);
+}
+
+#[test]
+fn test_unicode_on_reader_buffer_boundary_in_character_mode() {
+    let boundary = buf_reader_capacity().saturating_sub(1);
+    assert!(boundary > 0, "BufReader capacity must be greater than 1");
+
+    let mut input = "a".repeat(boundary);
+    input.push('\u{B250}');
+    input.push_str(&"a".repeat(100));
+    input.push('\n');
+
+    let expected_tail = tail_inclusive(&fold_characters_reference(&input, 80), 4);
+
+    let result = new_ucmd!().arg("--characters").pipe_in(input).succeeds();
+
+    let actual_tail = tail_inclusive(result.stdout_str(), 4);
+
+    assert_eq!(actual_tail, expected_tail);
+}
+
+#[test]
+fn test_fold_preserves_invalid_utf8_sequences() {
+    let bad_input: &[u8] = b"\xC3|\xED\xBA\xAD|\x00|\x89|\xED\xA6\xBF\xED\xBF\xBF\n";
+
+    new_ucmd!()
+        .pipe_in(bad_input.to_vec())
+        .succeeds()
+        .stdout_is_bytes(bad_input);
+}
+
+#[test]
+fn test_fold_preserves_incomplete_utf8_at_eof() {
+    let trailing_byte: &[u8] = b"\xC3";
+
+    new_ucmd!()
+        .pipe_in(trailing_byte.to_vec())
+        .succeeds()
+        .stdout_is_bytes(trailing_byte);
+}
+
+#[test]
+fn test_zero_width_bytes_in_column_mode() {
+    let len = io_buf_size_times_two();
+    let input = vec![0u8; len];
+
+    new_ucmd!()
+        .pipe_in(input.clone())
+        .succeeds()
+        .stdout_is_bytes(input);
+}
+
+#[test]
+fn test_zero_width_bytes_in_character_mode() {
+    let len = io_buf_size_times_two();
+    let input = vec![0u8; len];
+    let expected = fold_characters_reference_bytes(&input, 80);
+
+    new_ucmd!()
+        .args(&["--characters"])
+        .pipe_in(input)
+        .succeeds()
+        .stdout_is_bytes(expected);
+}
+
+#[test]
+fn test_zero_width_spaces_in_column_mode() {
+    let len = io_buf_size_times_two();
+    let input = "\u{200B}".repeat(len);
+
+    new_ucmd!()
+        .pipe_in(input.clone())
+        .succeeds()
+        .stdout_is(&input);
+}
+
+#[test]
+fn test_zero_width_spaces_in_character_mode() {
+    let len = io_buf_size_times_two();
+    let input = "\u{200B}".repeat(len);
+    let expected = fold_characters_reference(&input, 80);
+
+    new_ucmd!()
+        .args(&["--characters"])
+        .pipe_in(input)
+        .succeeds()
+        .stdout_is(&expected);
+}
+
+#[test]
+fn test_zero_width_bytes_from_file() {
+    let len = io_buf_size_times_two();
+    let input = vec![0u8; len];
+    let expected = fold_characters_reference_bytes(&input, 80);
+
+    let ts = TestScenario::new(util_name!());
+    let path = "zeros.bin";
+    ts.fixtures.write_bytes(path, &input);
+
+    ts.ucmd().arg(path).succeeds().stdout_is_bytes(&input);
+
+    ts.ucmd()
+        .args(&["--characters", path])
+        .succeeds()
+        .stdout_is_bytes(expected);
+}
+
+#[test]
+fn test_zero_width_spaces_from_file() {
+    let len = io_buf_size_times_two();
+    let input = "\u{200B}".repeat(len);
+    let expected = fold_characters_reference(&input, 80);
+
+    let ts = TestScenario::new(util_name!());
+    let path = "zero-width.txt";
+    ts.fixtures.write(path, &input);
+
+    ts.ucmd().arg(path).succeeds().stdout_is(&input);
+
+    ts.ucmd()
+        .args(&["--characters", path])
+        .succeeds()
+        .stdout_is(&expected);
+}
+
+#[test]
+fn test_zero_width_data_line_counts() {
+    let len = io_buf_size_times_two();
+
+    let zero_bytes = vec![0u8; len];
+    let column_bytes = new_ucmd!().pipe_in(zero_bytes.clone()).succeeds();
+    assert_eq!(
+        newline_count(column_bytes.stdout()),
+        0,
+        "fold should not wrap zero-width bytes in column mode",
+    );
+
+    let characters_bytes = new_ucmd!()
+        .args(&["--characters"])
+        .pipe_in(zero_bytes)
+        .succeeds();
+    assert_eq!(
+        newline_count(characters_bytes.stdout()),
+        len / 80,
+        "fold --characters should wrap zero-width bytes every 80 bytes",
+    );
+
+    if UnicodeWidthChar::width('\u{200B}') != Some(0) {
+        eprintln!("skip zero width space checks because width != 0");
+        return;
+    }
+
+    let zero_width_spaces = "\u{200B}".repeat(len);
+    let column_spaces = new_ucmd!().pipe_in(zero_width_spaces.clone()).succeeds();
+    assert_eq!(
+        newline_count(column_spaces.stdout()),
+        0,
+        "fold should keep zero-width spaces on a single line in column mode",
+    );
+
+    let characters_spaces = new_ucmd!()
+        .args(&["--characters"])
+        .pipe_in(zero_width_spaces)
+        .succeeds();
+    assert_eq!(
+        newline_count(characters_spaces.stdout()),
+        len / 80,
+        "fold --characters should wrap zero-width spaces every 80 characters",
+    );
+}
+
+#[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "netbsd"))]
+#[test]
+fn test_fold_reports_no_space_left_on_dev_full() {
+    use std::fs::OpenOptions;
+    use std::process::Stdio;
+
+    for &byte in &[b'\n', b'\0', 0xC3u8] {
+        let dev_full = OpenOptions::new()
+            .write(true)
+            .open("/dev/full")
+            .expect("/dev/full must exist on supported targets");
+
+        new_ucmd!()
+            .pipe_in(vec![byte; 1024])
+            .set_stdout(Stdio::from(dev_full))
+            .fails()
+            .stderr_contains("No space left");
+    }
+}
+
+fn buf_reader_capacity() -> usize {
+    std::io::BufReader::new(&b""[..]).capacity()
+}
+
+fn io_buf_size_times_two() -> usize {
+    buf_reader_capacity()
+        .checked_mul(2)
+        .expect("BufReader capacity overflow")
+}
+
+fn fold_characters_reference(input: &str, width: usize) -> String {
+    let mut output = String::with_capacity(input.len());
+    let mut col_count = 0usize;
+
+    for ch in input.chars() {
+        if ch == '\n' {
+            output.push('\n');
+            col_count = 0;
+            continue;
+        }
+
+        if col_count >= width {
+            output.push('\n');
+            col_count = 0;
+        }
+
+        output.push(ch);
+        col_count += 1;
+    }
+
+    output
+}
+
+fn fold_characters_reference_bytes(input: &[u8], width: usize) -> Vec<u8> {
+    let mut output = Vec::with_capacity(input.len() + input.len() / width + 1);
+
+    for chunk in input.chunks(width) {
+        output.extend_from_slice(chunk);
+        if chunk.len() == width {
+            output.push(b'\n');
+        }
+    }
+
+    output
+}
+
+fn newline_count(bytes: &[u8]) -> usize {
+    count(bytes, b'\n')
+}
+
+fn tail_inclusive(text: &str, lines: usize) -> String {
+    if lines == 0 {
+        return String::new();
+    }
+
+    let segments: Vec<&str> = text.split_inclusive('\n').collect();
+    if segments.is_empty() {
+        return text.to_owned();
+    }
+
+    let start = segments.len().saturating_sub(lines);
+    segments[start..].concat()
+}
+
 #[test]
 fn test_should_preserve_empty_line_without_final_newline() {
     new_ucmd!()