From 5f0481e5eeea27aa11ad18e6f1af6fb0f784d3cc Mon Sep 17 00:00:00 2001 From: Jayant Date: Fri, 20 Feb 2026 14:23:49 +0530 Subject: [PATCH] fix: implement fatal option for TextDecoder (Fixes #4614) --- core/runtime/src/text/encodings.rs | 57 +++++++++++++------ core/runtime/src/text/mod.rs | 86 ++++++++++++++-------------- core/runtime/src/text/tests.rs | 90 +++++++++++++++++++++++++----- 3 files changed, 162 insertions(+), 71 deletions(-) diff --git a/core/runtime/src/text/encodings.rs b/core/runtime/src/text/encodings.rs index a8f2e7e99ca..0547125a79b 100644 --- a/core/runtime/src/text/encodings.rs +++ b/core/runtime/src/text/encodings.rs @@ -1,6 +1,6 @@ pub(crate) mod utf8 { - use boa_engine::JsString; use boa_engine::string::CodePoint; + use boa_engine::{JsResult, JsString, js_error}; pub(crate) fn encode(input: &JsString) -> Vec { input @@ -12,15 +12,21 @@ pub(crate) mod utf8 { .collect() } - pub(crate) fn decode(input: &[u8]) -> JsString { - let string = String::from_utf8_lossy(input); - JsString::from(string.as_ref()) + pub(crate) fn decode(input: &[u8], fatal: bool) -> JsResult { + if fatal { + let s = std::str::from_utf8(input) + .map_err(|_| js_error!(TypeError: "The encoded data was not valid."))?; + Ok(JsString::from(s)) + } else { + let string = String::from_utf8_lossy(input); + Ok(JsString::from(string.as_ref())) + } } } pub(crate) mod utf16le { use boa_engine::string::JsStrVariant; - use boa_engine::{JsString, js_string}; + use boa_engine::{JsResult, JsString, js_error, js_string}; pub(crate) fn encode(input: &JsString) -> Vec { match input.as_str().variant() { @@ -29,28 +35,37 @@ pub(crate) mod utf16le { } } - pub(crate) fn decode(mut input: &[u8]) -> JsString { - // After this point, input is of even length. + pub(crate) fn decode(mut input: &[u8], fatal: bool) -> JsResult { let dangling = if input.len().is_multiple_of(2) { false } else { + if fatal { + return Err(js_error!(TypeError: "The encoded data was not valid.")); + } input = &input[0..input.len() - 1]; true }; let input: &[u16] = bytemuck::cast_slice(input); - if dangling { - JsString::from(&[JsString::from(input), js_string!("\u{FFFD}")]) + if fatal { + let s = String::from_utf16(input) + .map_err(|_| js_error!(TypeError: "The encoded data was not valid."))?; + Ok(JsString::from(s)) + } else if dangling { + Ok(JsString::from(&[ + JsString::from(input), + js_string!("\u{FFFD}"), + ])) } else { - JsString::from(input) + Ok(JsString::from(input)) } } } pub(crate) mod utf16be { use boa_engine::string::JsStrVariant; - use boa_engine::{JsString, js_string}; + use boa_engine::{JsResult, JsString, js_error, js_string}; pub(crate) fn encode(input: &JsString) -> Vec { match input.as_str().variant() { @@ -59,12 +74,14 @@ pub(crate) mod utf16be { } } - pub(crate) fn decode(mut input: Vec) -> JsString { + pub(crate) fn decode(mut input: Vec, fatal: bool) -> JsResult { let mut input = input.as_mut_slice(); - // After this point, input is of even length. let dangling = if input.len().is_multiple_of(2) { false } else { + if fatal { + return Err(js_error!(TypeError: "The encoded data was not valid.")); + } let new_len = input.len() - 1; input = &mut input[0..new_len]; true @@ -72,15 +89,21 @@ pub(crate) mod utf16be { let input: &mut [u16] = bytemuck::cast_slice_mut(input); - // Swap the bytes. for b in &mut *input { *b = b.swap_bytes(); } - if dangling { - JsString::from(&[JsString::from(&*input), js_string!("\u{FFFD}")]) + if fatal { + let s = String::from_utf16(input) + .map_err(|_| js_error!(TypeError: "The encoded data was not valid."))?; + Ok(JsString::from(s)) + } else if dangling { + Ok(JsString::from(&[ + JsString::from(&*input), + js_string!("\u{FFFD}"), + ])) } else { - JsString::from(&*input) + Ok(JsString::from(&*input)) } } } diff --git a/core/runtime/src/text/mod.rs b/core/runtime/src/text/mod.rs index e7aea823ac6..ba7d6216f31 100644 --- a/core/runtime/src/text/mod.rs +++ b/core/runtime/src/text/mod.rs @@ -19,61 +19,65 @@ mod encodings; /// a specific character encoding, like `utf-8`. /// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder -#[derive(Debug, Default, Clone, JsData, Trace, Finalize)] +#[derive(Debug, Clone, JsData, Trace, Finalize)] pub enum TextDecoder { - /// Decode bytes encoded as UTF-8 into strings. - #[default] - Utf8, - /// Decode bytes encoded as UTF-16 (little endian) into strings. - Utf16Le, - /// Decode bytes encoded as UTF-16 (big endian) into strings. - Utf16Be, + Utf8 { + fatal: bool, + }, + Utf16Le { + fatal: bool, + }, + Utf16Be { + fatal: bool, + }, +} + +impl Default for TextDecoder { + fn default() -> Self { + Self::Utf8 { fatal: false } + } } #[boa_class] impl TextDecoder { - /// The [`TextDecoder()`][mdn] constructor returns a new `TextDecoder` object. - /// - /// # Errors - /// This will return an error if the encoding or options are invalid or unsupported. - /// - /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/TextDecoder #[boa(constructor)] - pub fn constructor(encoding: Option, _options: Option) -> JsResult { - let Some(encoding) = encoding else { - return Ok(Self::default()); + pub fn constructor(encoding: Option, options: Option) -> JsResult { + let encoding = encoding.as_ref().map(|e| e.to_std_string_lossy()); + let fatal = if let Some(options) = options { + options + .get(js_string!("fatal"), &mut Context::default())? + .to_boolean() + } else { + false }; - match encoding.to_std_string_lossy().as_str() { - "utf-8" => Ok(Self::Utf8), + match encoding.as_deref() { + None | Some("utf-8") => Ok(Self::Utf8 { fatal }), // Default encoding is Little Endian. - "utf-16" | "utf-16le" => Ok(Self::Utf16Le), - "utf-16be" => Ok(Self::Utf16Be), - e => Err(js_error!(RangeError: "The given encoding '{}' is not supported.", e)), + Some("utf-16" | "utf-16le") => Ok(Self::Utf16Le { fatal }), + Some("utf-16be") => Ok(Self::Utf16Be { fatal }), + Some(e) => Err(js_error!(RangeError: "The given encoding '{}' is not supported.", e)), } } - /// The [`TextDecoder.encoding`][mdn] read-only property returns a string containing - /// the name of the character encoding that this decoder will use. - /// - /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/encoding #[boa(getter)] #[must_use] pub fn encoding(&self) -> JsString { match self { - Self::Utf8 => js_string!("utf-8"), - Self::Utf16Le => js_string!("utf-16le"), - Self::Utf16Be => js_string!("utf-16be"), + Self::Utf8 { .. } => js_string!("utf-8"), + Self::Utf16Le { .. } => js_string!("utf-16le"), + Self::Utf16Be { .. } => js_string!("utf-16be"), + } + } + + #[boa(getter)] + #[must_use] + pub fn fatal(&self) -> bool { + match self { + Self::Utf8 { fatal } | Self::Utf16Le { fatal } | Self::Utf16Be { fatal } => *fatal, } } - /// The [`TextDecoder.decode()`][mdn] method returns a string containing text decoded from the - /// buffer passed as a parameter. - /// - /// # Errors - /// Any error that arises during decoding the specific encoding. - /// - /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/decode pub fn decode(&self, buffer: JsValue, context: &mut Context) -> JsResult { // `buffer` can be an `ArrayBuffer`, a `TypedArray` or a `DataView`. let bytes = if let Ok(array_buffer) = JsArrayBuffer::try_from_js(&buffer, context) { @@ -90,11 +94,11 @@ impl TextDecoder { }; let buffer = bytes.iter(context).collect::>(); - Ok(match self { - Self::Utf8 => encodings::utf8::decode(&buffer), - Self::Utf16Le => encodings::utf16le::decode(&buffer), - Self::Utf16Be => encodings::utf16be::decode(buffer), - }) + match self { + Self::Utf8 { fatal } => encodings::utf8::decode(&buffer, *fatal), + Self::Utf16Le { fatal } => encodings::utf16le::decode(&buffer, *fatal), + Self::Utf16Be { fatal } => encodings::utf16be::decode(buffer, *fatal), + } } } diff --git a/core/runtime/src/text/tests.rs b/core/runtime/src/text/tests.rs index d6b262ae129..e695b53d81d 100644 --- a/core/runtime/src/text/tests.rs +++ b/core/runtime/src/text/tests.rs @@ -2,9 +2,8 @@ use crate::test::{TestAction, run_test_actions_with}; use crate::text; use boa_engine::object::builtins::JsUint8Array; use boa_engine::property::Attribute; -use boa_engine::{Context, JsString, js_str, js_string}; +use boa_engine::{Context, JsString, JsValue, js_str, js_string}; use indoc::indoc; -use test_case::test_case; #[test] fn encoder_js() { @@ -125,25 +124,20 @@ fn decoder_js_invalid() { ); } -#[test_case("utf-8")] -#[test_case("utf-16")] -#[test_case("utf-16le")] -#[test_case("utf-16be")] -fn roundtrip(encoding: &'static str) { +#[test] +fn roundtrip() { let context = &mut Context::default(); text::register(None, context).unwrap(); run_test_actions_with( [ - TestAction::run(format!( - r#" - const encoder = new TextEncoder({encoding:?}); - const decoder = new TextDecoder({encoding:?}); + TestAction::run(indoc! {r#" + const encoder = new TextEncoder(); + const decoder = new TextDecoder(); const text = "Hello, World!"; const encoded = encoder.encode(text); decoded = decoder.decode(encoded); - "# - )), + "#}), TestAction::inspect_context(|context| { let decoded = context .global_object() @@ -155,3 +149,73 @@ fn roundtrip(encoding: &'static str) { context, ); } + +#[test] +fn decoder_fatal() { + let context = &mut Context::default(); + text::register(None, context).unwrap(); + + run_test_actions_with( + [ + TestAction::run(indoc! {r#" + const decoder = new TextDecoder("utf-8", { fatal: true }); + try { + decoder.decode(new Uint8Array([0xFF])); + } catch (e) { + error = e; + } + "#}), + TestAction::inspect_context(|context| { + let error = context + .global_object() + .get(js_str!("error"), context) + .unwrap(); + assert!(error.is_object()); + let error = error.as_object().unwrap(); + assert_eq!( + JsValue::from(error.clone()) + .to_string(context) + .unwrap() + .to_std_string_lossy(), + "TypeError: The encoded data was not valid." + ); + }), + ], + context, + ); +} + +#[test] +fn decoder_fatal_utf16() { + let context = &mut Context::default(); + text::register(None, context).unwrap(); + + run_test_actions_with( + [ + TestAction::run(indoc! {r#" + const decoder = new TextDecoder("utf-16le", { fatal: true }); + try { + decoder.decode(new Uint8Array([0x00])); + } catch (e) { + error = e; + } + "#}), + TestAction::inspect_context(|context| { + let error = context + .global_object() + .get(js_str!("error"), context) + .unwrap(); + assert!(error.is_object()); + let error = error.as_object().unwrap(); + assert_eq!( + JsValue::from(error.clone()) + .to_string(context) + .unwrap() + .to_std_string_lossy(), + "TypeError: The encoded data was not valid." + ); + }), + ], + context, + ); +}