Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 40 additions & 17 deletions core/runtime/src/text/encodings.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pub(crate) mod utf8 {
use boa_engine::JsString;
use boa_engine::string::CodePoint;
use boa_engine::{JsResult, JsString, js_error};

pub(crate) fn encode(input: &JsString) -> Vec<u8> {
input
Expand All @@ -12,15 +12,21 @@ pub(crate) mod utf8 {
.collect()
}

pub(crate) fn decode(input: &[u8]) -> JsString {
let string = String::from_utf8_lossy(input);
JsString::from(string.as_ref())
pub(crate) fn decode(input: &[u8], fatal: bool) -> JsResult<JsString> {
if fatal {
let s = std::str::from_utf8(input)
.map_err(|_| js_error!(TypeError: "The encoded data was not valid."))?;
Ok(JsString::from(s))
} else {
let string = String::from_utf8_lossy(input);
Ok(JsString::from(string.as_ref()))
}
}
}

pub(crate) mod utf16le {
use boa_engine::string::JsStrVariant;
use boa_engine::{JsString, js_string};
use boa_engine::{JsResult, JsString, js_error, js_string};

pub(crate) fn encode(input: &JsString) -> Vec<u8> {
match input.as_str().variant() {
Expand All @@ -29,28 +35,37 @@ pub(crate) mod utf16le {
}
}

pub(crate) fn decode(mut input: &[u8]) -> JsString {
// After this point, input is of even length.
pub(crate) fn decode(mut input: &[u8], fatal: bool) -> JsResult<JsString> {
let dangling = if input.len().is_multiple_of(2) {
false
} else {
if fatal {
return Err(js_error!(TypeError: "The encoded data was not valid."));
}
input = &input[0..input.len() - 1];
true
};

let input: &[u16] = bytemuck::cast_slice(input);

if dangling {
JsString::from(&[JsString::from(input), js_string!("\u{FFFD}")])
if fatal {
let s = String::from_utf16(input)
.map_err(|_| js_error!(TypeError: "The encoded data was not valid."))?;
Ok(JsString::from(s))
} else if dangling {
Ok(JsString::from(&[
JsString::from(input),
js_string!("\u{FFFD}"),
]))
} else {
JsString::from(input)
Ok(JsString::from(input))
}
}
}

pub(crate) mod utf16be {
use boa_engine::string::JsStrVariant;
use boa_engine::{JsString, js_string};
use boa_engine::{JsResult, JsString, js_error, js_string};

pub(crate) fn encode(input: &JsString) -> Vec<u8> {
match input.as_str().variant() {
Expand All @@ -59,28 +74,36 @@ pub(crate) mod utf16be {
}
}

pub(crate) fn decode(mut input: Vec<u8>) -> JsString {
pub(crate) fn decode(mut input: Vec<u8>, fatal: bool) -> JsResult<JsString> {
let mut input = input.as_mut_slice();
// After this point, input is of even length.
let dangling = if input.len().is_multiple_of(2) {
false
} else {
if fatal {
return Err(js_error!(TypeError: "The encoded data was not valid."));
}
let new_len = input.len() - 1;
input = &mut input[0..new_len];
true
};

let input: &mut [u16] = bytemuck::cast_slice_mut(input);

// Swap the bytes.
for b in &mut *input {
*b = b.swap_bytes();
}

if dangling {
JsString::from(&[JsString::from(&*input), js_string!("\u{FFFD}")])
if fatal {
let s = String::from_utf16(input)
.map_err(|_| js_error!(TypeError: "The encoded data was not valid."))?;
Ok(JsString::from(s))
} else if dangling {
Ok(JsString::from(&[
JsString::from(&*input),
js_string!("\u{FFFD}"),
]))
} else {
JsString::from(&*input)
Ok(JsString::from(&*input))
}
}
}
86 changes: 45 additions & 41 deletions core/runtime/src/text/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,61 +19,65 @@ mod encodings;
/// a specific character encoding, like `utf-8`.
///
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder
#[derive(Debug, Default, Clone, JsData, Trace, Finalize)]
#[derive(Debug, Clone, JsData, Trace, Finalize)]
pub enum TextDecoder {
/// Decode bytes encoded as UTF-8 into strings.
#[default]
Utf8,
/// Decode bytes encoded as UTF-16 (little endian) into strings.
Utf16Le,
/// Decode bytes encoded as UTF-16 (big endian) into strings.
Utf16Be,
Utf8 {
fatal: bool,
},
Utf16Le {
fatal: bool,
},
Utf16Be {
fatal: bool,
},
}

impl Default for TextDecoder {
fn default() -> Self {
Self::Utf8 { fatal: false }
}
}

#[boa_class]
impl TextDecoder {
/// The [`TextDecoder()`][mdn] constructor returns a new `TextDecoder` object.
///
/// # Errors
/// This will return an error if the encoding or options are invalid or unsupported.
///
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/TextDecoder
#[boa(constructor)]
pub fn constructor(encoding: Option<JsString>, _options: Option<JsObject>) -> JsResult<Self> {
let Some(encoding) = encoding else {
return Ok(Self::default());
pub fn constructor(encoding: Option<JsString>, options: Option<JsObject>) -> JsResult<Self> {
let encoding = encoding.as_ref().map(|e| e.to_std_string_lossy());
let fatal = if let Some(options) = options {
options
.get(js_string!("fatal"), &mut Context::default())?
.to_boolean()
} else {
false
};

match encoding.to_std_string_lossy().as_str() {
"utf-8" => Ok(Self::Utf8),
match encoding.as_deref() {
None | Some("utf-8") => Ok(Self::Utf8 { fatal }),
// Default encoding is Little Endian.
"utf-16" | "utf-16le" => Ok(Self::Utf16Le),
"utf-16be" => Ok(Self::Utf16Be),
e => Err(js_error!(RangeError: "The given encoding '{}' is not supported.", e)),
Some("utf-16" | "utf-16le") => Ok(Self::Utf16Le { fatal }),
Some("utf-16be") => Ok(Self::Utf16Be { fatal }),
Some(e) => Err(js_error!(RangeError: "The given encoding '{}' is not supported.", e)),
}
}

/// The [`TextDecoder.encoding`][mdn] read-only property returns a string containing
/// the name of the character encoding that this decoder will use.
///
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/encoding
#[boa(getter)]
#[must_use]
pub fn encoding(&self) -> JsString {
match self {
Self::Utf8 => js_string!("utf-8"),
Self::Utf16Le => js_string!("utf-16le"),
Self::Utf16Be => js_string!("utf-16be"),
Self::Utf8 { .. } => js_string!("utf-8"),
Self::Utf16Le { .. } => js_string!("utf-16le"),
Self::Utf16Be { .. } => js_string!("utf-16be"),
}
}

#[boa(getter)]
#[must_use]
pub fn fatal(&self) -> bool {
match self {
Self::Utf8 { fatal } | Self::Utf16Le { fatal } | Self::Utf16Be { fatal } => *fatal,
}
}

/// The [`TextDecoder.decode()`][mdn] method returns a string containing text decoded from the
/// buffer passed as a parameter.
///
/// # Errors
/// Any error that arises during decoding the specific encoding.
///
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder/decode
pub fn decode(&self, buffer: JsValue, context: &mut Context) -> JsResult<JsString> {
// `buffer` can be an `ArrayBuffer`, a `TypedArray` or a `DataView`.
let bytes = if let Ok(array_buffer) = JsArrayBuffer::try_from_js(&buffer, context) {
Expand All @@ -90,11 +94,11 @@ impl TextDecoder {
};

let buffer = bytes.iter(context).collect::<Vec<u8>>();
Ok(match self {
Self::Utf8 => encodings::utf8::decode(&buffer),
Self::Utf16Le => encodings::utf16le::decode(&buffer),
Self::Utf16Be => encodings::utf16be::decode(buffer),
})
match self {
Self::Utf8 { fatal } => encodings::utf8::decode(&buffer, *fatal),
Self::Utf16Le { fatal } => encodings::utf16le::decode(&buffer, *fatal),
Self::Utf16Be { fatal } => encodings::utf16be::decode(buffer, *fatal),
}
}
}

Expand Down
90 changes: 77 additions & 13 deletions core/runtime/src/text/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ use crate::test::{TestAction, run_test_actions_with};
use crate::text;
use boa_engine::object::builtins::JsUint8Array;
use boa_engine::property::Attribute;
use boa_engine::{Context, JsString, js_str, js_string};
use boa_engine::{Context, JsString, JsValue, js_str, js_string};
use indoc::indoc;
use test_case::test_case;

#[test]
fn encoder_js() {
Expand Down Expand Up @@ -125,25 +124,20 @@ fn decoder_js_invalid() {
);
}

#[test_case("utf-8")]
#[test_case("utf-16")]
#[test_case("utf-16le")]
#[test_case("utf-16be")]
fn roundtrip(encoding: &'static str) {
#[test]
fn roundtrip() {
let context = &mut Context::default();
text::register(None, context).unwrap();

run_test_actions_with(
[
TestAction::run(format!(
r#"
const encoder = new TextEncoder({encoding:?});
const decoder = new TextDecoder({encoding:?});
TestAction::run(indoc! {r#"
const encoder = new TextEncoder();
const decoder = new TextDecoder();
const text = "Hello, World!";
const encoded = encoder.encode(text);
decoded = decoder.decode(encoded);
"#
)),
"#}),
TestAction::inspect_context(|context| {
let decoded = context
.global_object()
Expand All @@ -155,3 +149,73 @@ fn roundtrip(encoding: &'static str) {
context,
);
}

#[test]
fn decoder_fatal() {
let context = &mut Context::default();
text::register(None, context).unwrap();

run_test_actions_with(
[
TestAction::run(indoc! {r#"
const decoder = new TextDecoder("utf-8", { fatal: true });
try {
decoder.decode(new Uint8Array([0xFF]));
} catch (e) {
error = e;
}
"#}),
TestAction::inspect_context(|context| {
let error = context
.global_object()
.get(js_str!("error"), context)
.unwrap();
assert!(error.is_object());
let error = error.as_object().unwrap();
assert_eq!(
JsValue::from(error.clone())
.to_string(context)
.unwrap()
.to_std_string_lossy(),
"TypeError: The encoded data was not valid."
);
}),
],
context,
);
}

#[test]
fn decoder_fatal_utf16() {
let context = &mut Context::default();
text::register(None, context).unwrap();

run_test_actions_with(
[
TestAction::run(indoc! {r#"
const decoder = new TextDecoder("utf-16le", { fatal: true });
try {
decoder.decode(new Uint8Array([0x00]));
} catch (e) {
error = e;
}
"#}),
TestAction::inspect_context(|context| {
let error = context
.global_object()
.get(js_str!("error"), context)
.unwrap();
assert!(error.is_object());
let error = error.as_object().unwrap();
assert_eq!(
JsValue::from(error.clone())
.to_string(context)
.unwrap()
.to_std_string_lossy(),
"TypeError: The encoded data was not valid."
);
}),
],
context,
);
}
Loading