diff --git a/config.cmake.in b/config.cmake.in
index 576ed78de7..4fbda765ec 100644
--- a/config.cmake.in
+++ b/config.cmake.in
@@ -73,6 +73,7 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS})
   elseif(component STREQUAL "email")
     include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake")
     include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_dns.cmake")
+    include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake")
     include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_email.cmake")
   elseif(component STREQUAL "uri")
     include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake")
diff --git a/src/core/email/CMakeLists.txt b/src/core/email/CMakeLists.txt
index 7330454c26..22828bcf36 100644
--- a/src/core/email/CMakeLists.txt
+++ b/src/core/email/CMakeLists.txt
@@ -1,5 +1,5 @@
 sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME email
-  SOURCES email.cc)
+  SOURCES email.cc helpers.h)
 
 if(SOURCEMETA_CORE_INSTALL)
   sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME email)
@@ -9,3 +9,5 @@ target_link_libraries(sourcemeta_core_email
   PRIVATE sourcemeta::core::dns)
 target_link_libraries(sourcemeta_core_email
   PRIVATE sourcemeta::core::ip)
+target_link_libraries(sourcemeta_core_email
+  PRIVATE sourcemeta::core::unicode)
diff --git a/src/core/email/email.cc b/src/core/email/email.cc
index 12a15472f7..c3eb899cfa 100644
--- a/src/core/email/email.cc
+++ b/src/core/email/email.cc
@@ -1,110 +1,15 @@
 #include <sourcemeta/core/email.h>
 
 #include <sourcemeta/core/dns.h>
-#include <sourcemeta/core/ip.h>
 
-namespace sourcemeta::core {
-
-// RFC 5321 §4.1.2: atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" /
-// "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" / "_" / "`" /
-// "{" / "|" / "}" / "~"
-static constexpr auto is_atext(const char character) -> bool {
-  switch (character) {
-    case '!':
-    case '#':
-    case '$':
-    case '%':
-    case '&':
-    case '\'':
-    case '*':
-    case '+':
-    case '-':
-    case '/':
-    case '=':
-    case '?':
-    case '^':
-    case '_':
-    case '`':
-    case '{':
-    case '|':
-    case '}':
-    case '~':
-      return true;
-    default:
-      return (character >= 'A' && character <= 'Z') ||
-             (character >= 'a' && character <= 'z') ||
-             (character >= '0' && character <= '9');
-  }
-}
-
-// RFC 5321 §4.1.2: qtextSMTP = %d32-33 / %d35-91 / %d93-126
-static constexpr auto is_qtext_smtp(const unsigned char character) -> bool {
-  return (character >= 32 && character <= 33) ||
-         (character >= 35 && character <= 91) ||
-         (character >= 93 && character <= 126);
-}
-
-// RFC 5321 §4.1.2: Let-dig = ALPHA / DIGIT
-static constexpr auto is_let_dig(const char character) -> bool {
-  return (character >= 'A' && character <= 'Z') ||
-         (character >= 'a' && character <= 'z') ||
-         (character >= '0' && character <= '9');
-}
-
-// RFC 5321 §4.1.3: dcontent = %d33-90 / %d94-126
-static constexpr auto is_dcontent(const unsigned char character) -> bool {
-  return (character >= 33 && character <= 90) ||
-         (character >= 94 && character <= 126);
-}
-
-// RFC 5321 §4.1.2: Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
-// RFC 5321 §4.1.3: Standardized-tag = Ldh-str
-static constexpr auto is_ldh_str(const std::string_view value) -> bool {
-  if (value.empty() || !is_let_dig(value.back())) {
-    return false;
-  }
-  for (std::string_view::size_type position{0}; position + 1 < value.size();
-       position += 1) {
-    const auto character{value[position]};
-    if (!is_let_dig(character) && character != '-') {
-      return false;
-    }
-  }
-  return true;
-}
-
-// RFC 5234 §2.3: ABNF literal strings are case-insensitive by default
-// RFC 5321 §4.1.3: IPv6-address-literal prefix is the literal "IPv6:"
-static constexpr auto matches_ipv6_tag(const std::string_view value) -> bool {
-  return value.size() >= 5 && (value[0] == 'I' || value[0] == 'i') &&
-         (value[1] == 'P' || value[1] == 'p') &&
-         (value[2] == 'v' || value[2] == 'V') && value[3] == '6' &&
-         value[4] == ':';
-}
+#include "helpers.h"
 
-// RFC 5321 §4.1.3: General-address-literal = Standardized-tag ":" 1*dcontent
-static constexpr auto is_general_address_literal(const std::string_view value)
-    -> bool {
-  const auto colon_position{value.find(':')};
-  if (colon_position == std::string_view::npos) {
-    return false;
-  }
-  if (!is_ldh_str(value.substr(0, colon_position))) {
-    return false;
-  }
-  const auto content{value.substr(colon_position + 1)};
-  if (content.empty()) {
-    return false;
-  }
-  for (const auto character : content) {
-    if (!is_dcontent(static_cast<unsigned char>(character))) {
-      return false;
-    }
-  }
-  return true;
-}
+namespace sourcemeta::core {
 
-auto is_email(const std::string_view value) -> bool {
+// RFC 5321 §4.1.2 Mailbox grammar. When AllowUtf8 is true, RFC 6531 §3.3
+// extends atext, qtextSMTP, and sub-domain with UTF8-non-ascii alternatives
+template <bool AllowUtf8>
+static auto is_mailbox(const std::string_view value) -> bool {
   if (value.empty()) {
     return false;
   }
@@ -126,11 +31,23 @@ auto is_email(const std::string_view value) -> bool {
           return false;
         }
         position += 1;
-      } else {
-        if (!is_qtext_smtp(static_cast<unsigned char>(value[position]))) {
+        continue;
+      }
+
+      if (is_qtext_smtp(static_cast<unsigned char>(value[position]))) {
+        position += 1;
+        continue;
+      }
+
+      if constexpr (AllowUtf8) {
+        // RFC 6531 §3.3: qtextSMTP =/ UTF8-non-ascii
+        const auto utf8_length{utf8_codepoint_length(value, position)};
+        if (utf8_length < 2) {
           return false;
         }
-        position += 1;
+        position += utf8_length;
+      } else {
+        return false;
       }
     }
     if (position >= value.size()) {
@@ -150,13 +67,29 @@ auto is_email(const std::string_view value) -> bool {
         }
         previous_was_dot = true;
         atom_started = false;
-      } else if (is_atext(character)) {
+        position += 1;
+        continue;
+      }
+
+      if (is_atext(character)) {
         previous_was_dot = false;
         atom_started = true;
+        position += 1;
+        continue;
+      }
+
+      if constexpr (AllowUtf8) {
+        // RFC 6531 §3.3: atext =/ UTF8-non-ascii
+        const auto utf8_length{utf8_codepoint_length(value, position)};
+        if (utf8_length < 2) {
+          return false;
+        }
+        previous_was_dot = false;
+        atom_started = true;
+        position += utf8_length;
       } else {
         return false;
       }
-      position += 1;
     }
     if (position == 0 || previous_was_dot) {
       return false;
@@ -177,32 +110,26 @@ auto is_email(const std::string_view value) -> bool {
 
   // RFC 5321 §4.1.3: address-literal = "[" ( IPv4 / IPv6 / General ) "]"
   if (!domain.empty() && domain.front() == '[') {
-    if (domain.back() != ']') {
-      return false;
-    }
-    // RFC 5321 §4.5.3.1.2: 255-octet cap on a domain "name or number"
-    if (domain.size() > 255) {
-      return false;
-    }
-    const auto inner{domain.substr(1, domain.size() - 2)};
-    // RFC 5321 §4.1.3: IPv6-address-literal = "IPv6:" IPv6-addr
-    if (matches_ipv6_tag(inner) && is_ipv6(inner.substr(5))) {
-      return true;
-    }
-    // RFC 5234 §3.2: ABNF alternatives are unordered. A failed IPv6 match
-    // falls through to IPv4 or General-address-literal.
-    // RFC 5321 §4.1.3: IPv4-address-literal = Snum 3("." Snum) has no ":",
-    // General-address-literal requires ":"
-    if (inner.find(':') == std::string_view::npos) {
-      return is_ipv4(inner);
-    }
-    return is_general_address_literal(inner);
+    return is_address_literal(domain);
   }
 
-  // RFC 5321 §4.1.2 Domain matches is_hostname (RFC 1123 §2.1) by
-  // grammar, by 63-octet label cap (RFC 1035 §2.3.4), and by
-  // 255-octet total cap (RFC 5321 §4.5.3.1.2)
-  return is_hostname(domain);
+  if constexpr (AllowUtf8) {
+    // RFC 6531 §3.3: sub-domain =/ U-label
+    return is_idn_domain(domain);
+  } else {
+    // RFC 5321 §4.1.2 Domain matches is_hostname (RFC 1123 §2.1) by
+    // grammar, by 63-octet label cap (RFC 1035 §2.3.4), and by
+    // 255-octet total cap (RFC 5321 §4.5.3.1.2)
+    return is_hostname(domain);
+  }
+}
+
+auto is_email(const std::string_view value) -> bool {
+  return is_mailbox<false>(value);
+}
+
+auto is_idn_email(const std::string_view value) -> bool {
+  return is_mailbox<true>(value);
 }
 
 } // namespace sourcemeta::core
diff --git a/src/core/email/helpers.h b/src/core/email/helpers.h
new file mode 100644
index 0000000000..5cc9b822bf
--- /dev/null
+++ b/src/core/email/helpers.h
@@ -0,0 +1,200 @@
+#ifndef SOURCEMETA_CORE_EMAIL_HELPERS_H_
+#define SOURCEMETA_CORE_EMAIL_HELPERS_H_
+
+#include <sourcemeta/core/ip.h>
+#include <sourcemeta/core/unicode.h>
+
+#include <string_view> // std::string_view
+
+namespace {
+
+// RFC 5321 §4.1.2: atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" /
+// "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" / "_" / "`" /
+// "{" / "|" / "}" / "~"
+inline constexpr auto is_atext(const char character) -> bool {
+  switch (character) {
+    case '!':
+    case '#':
+    case '$':
+    case '%':
+    case '&':
+    case '\'':
+    case '*':
+    case '+':
+    case '-':
+    case '/':
+    case '=':
+    case '?':
+    case '^':
+    case '_':
+    case '`':
+    case '{':
+    case '|':
+    case '}':
+    case '~':
+      return true;
+    default:
+      return (character >= 'A' && character <= 'Z') ||
+             (character >= 'a' && character <= 'z') ||
+             (character >= '0' && character <= '9');
+  }
+}
+
+// RFC 5321 §4.1.2: qtextSMTP = %d32-33 / %d35-91 / %d93-126
+inline constexpr auto is_qtext_smtp(const unsigned char character) -> bool {
+  return (character >= 32 && character <= 33) ||
+         (character >= 35 && character <= 91) ||
+         (character >= 93 && character <= 126);
+}
+
+// RFC 5321 §4.1.2: Let-dig = ALPHA / DIGIT
+inline constexpr auto is_let_dig(const char character) -> bool {
+  return (character >= 'A' && character <= 'Z') ||
+         (character >= 'a' && character <= 'z') ||
+         (character >= '0' && character <= '9');
+}
+
+// RFC 5321 §4.1.3: dcontent = %d33-90 / %d94-126
+inline constexpr auto is_dcontent(const unsigned char character) -> bool {
+  return (character >= 33 && character <= 90) ||
+         (character >= 94 && character <= 126);
+}
+
+// RFC 5321 §4.1.2: Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
+// RFC 5321 §4.1.3: Standardized-tag = Ldh-str
+inline constexpr auto is_ldh_str(const std::string_view value) -> bool {
+  if (value.empty() || !is_let_dig(value.back())) {
+    return false;
+  }
+  for (std::string_view::size_type position{0}; position + 1 < value.size();
+       position += 1) {
+    const auto character{value[position]};
+    if (!is_let_dig(character) && character != '-') {
+      return false;
+    }
+  }
+  return true;
+}
+
+// RFC 5234 §2.3: ABNF literal strings are case-insensitive by default
+// RFC 5321 §4.1.3: IPv6-address-literal prefix is the literal "IPv6:"
+inline constexpr auto matches_ipv6_tag(const std::string_view value) -> bool {
+  return value.size() >= 5 && (value[0] == 'I' || value[0] == 'i') &&
+         (value[1] == 'P' || value[1] == 'p') &&
+         (value[2] == 'v' || value[2] == 'V') && value[3] == '6' &&
+         value[4] == ':';
+}
+
+// RFC 5321 §4.1.3: General-address-literal = Standardized-tag ":" 1*dcontent
+inline constexpr auto is_general_address_literal(const std::string_view value)
+    -> bool {
+  const auto colon_position{value.find(':')};
+  if (colon_position == std::string_view::npos) {
+    return false;
+  }
+  if (!is_ldh_str(value.substr(0, colon_position))) {
+    return false;
+  }
+  const auto content{value.substr(colon_position + 1)};
+  if (content.empty()) {
+    return false;
+  }
+  for (const auto character : content) {
+    if (!is_dcontent(static_cast<unsigned char>(character))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// RFC 5321 §4.1.3: validate the address-literal payload (between "[" and "]")
+// as IPv6, IPv4, or General-address-literal. Always ASCII; no IDNA applies
+inline auto is_address_literal(const std::string_view domain) -> bool {
+  if (domain.back() != ']') {
+    return false;
+  }
+  // RFC 5321 §4.5.3.1.2: 255-octet cap on a domain "name or number"
+  if (domain.size() > 255) {
+    return false;
+  }
+  const auto inner{domain.substr(1, domain.size() - 2)};
+  // RFC 5321 §4.1.3: IPv6-address-literal = "IPv6:" IPv6-addr
+  if (matches_ipv6_tag(inner) && sourcemeta::core::is_ipv6(inner.substr(5))) {
+    return true;
+  }
+  // RFC 5234 §3.2: ABNF alternatives are unordered. A failed IPv6 match
+  // falls through to IPv4 or General-address-literal.
+  // RFC 5321 §4.1.3: IPv4-address-literal has no ":";
+  // General-address-literal requires ":"
+  if (inner.find(':') == std::string_view::npos) {
+    return sourcemeta::core::is_ipv4(inner);
+  }
+  return is_general_address_literal(inner);
+}
+
+// TODO: Move to src/core/dns
+
+// RFC 6531 §3.3: sub-domain =/ U-label
+// Relaxed sub-domain grammar where each label is a non-empty sequence of
+// LetDig / hyphen / UTF8-non-ascii bytes, with no leading or trailing hyphen,
+// length limits per RFC 5321 §4.5.3.1.2 and RFC 1035 §2.3.4
+inline auto is_idn_domain(const std::string_view value) -> bool {
+  if (value.empty() || value.size() > 255) {
+    return false;
+  }
+
+  std::string_view::size_type position{0};
+  while (position < value.size()) {
+    const auto label_start{position};
+    bool last_was_hyphen{false};
+    bool label_has_content{false};
+
+    while (position < value.size() && value[position] != '.') {
+      const auto character{value[position]};
+      if (character == '-') {
+        if (!label_has_content) {
+          return false;
+        }
+        last_was_hyphen = true;
+        position += 1;
+        label_has_content = true;
+        continue;
+      }
+
+      if (is_let_dig(character)) {
+        last_was_hyphen = false;
+        position += 1;
+        label_has_content = true;
+        continue;
+      }
+
+      const auto utf8_length{
+          sourcemeta::core::utf8_codepoint_length(value, position)};
+      if (utf8_length < 2) {
+        return false;
+      }
+      last_was_hyphen = false;
+      position += utf8_length;
+      label_has_content = true;
+    }
+
+    const auto label_length{position - label_start};
+    if (label_length == 0 || label_length > 63 || last_was_hyphen) {
+      return false;
+    }
+
+    if (position < value.size()) {
+      position += 1;
+      if (position == value.size()) {
+        // RFC 5321 §4.1.2 Domain has no trailing dot
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+} // namespace
+
+#endif
diff --git a/src/core/email/include/sourcemeta/core/email.h b/src/core/email/include/sourcemeta/core/email.h
index d65b2847b1..947c1b939c 100644
--- a/src/core/email/include/sourcemeta/core/email.h
+++ b/src/core/email/include/sourcemeta/core/email.h
@@ -8,7 +8,7 @@
 #include <string_view> // std::string_view
 
 /// @defgroup email Email
-/// @brief E-mail address validation per RFC 5321.
+/// @brief E-mail address validation per RFC 5321 and RFC 6531.
 ///
 /// This functionality is included as follows:
 ///
@@ -36,6 +36,26 @@ namespace sourcemeta::core {
 SOURCEMETA_CORE_EMAIL_EXPORT
 auto is_email(const std::string_view value) -> bool;
 
+/// @ingroup email
+/// Check whether the given string is a valid internationalized `Mailbox`
+/// per RFC 6531 Section 3.3 (extended Mailbox address syntax). Beyond the
+/// ASCII grammar accepted by `is_email`, the local-part atoms, quoted
+/// content, and domain labels may also contain valid UTF-8 non-ASCII byte
+/// sequences (RFC 6532 Section 3.1). For example:
+///
+/// ```cpp
+/// #include <sourcemeta/core/email.h>
+///
+/// #include <cassert>
+///
+/// assert(sourcemeta::core::is_idn_email(
+///     "\xec\x8b\xa4\xeb\xa1\x80@\xec\x8b\xa4\xeb\xa1\x80.\xed\x85\x8c\xec\x8a\xa4\xed\x8a\xb8"));
+/// assert(sourcemeta::core::is_idn_email("joe.bloggs@example.com"));
+/// assert(!sourcemeta::core::is_idn_email("2962"));
+/// ```
+SOURCEMETA_CORE_EMAIL_EXPORT
+auto is_idn_email(const std::string_view value) -> bool;
+
 } // namespace sourcemeta::core
 
 #endif
diff --git a/src/core/unicode/include/sourcemeta/core/unicode.h b/src/core/unicode/include/sourcemeta/core/unicode.h
index c8845a08eb..687734fd67 100644
--- a/src/core/unicode/include/sourcemeta/core/unicode.h
+++ b/src/core/unicode/include/sourcemeta/core/unicode.h
@@ -5,6 +5,8 @@
 #include <sourcemeta/core/unicode_export.h>
 #endif
 
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::uint8_t
 #include <istream>     // std::istream
 #include <optional>    // std::optional
 #include <ostream>     // std::ostream
@@ -98,6 +100,196 @@ SOURCEMETA_CORE_UNICODE_EXPORT
 auto utf8_to_utf32(const std::string_view input)
     -> std::optional<std::u32string>;
 
+/// @ingroup unicode
+/// Determine the byte length encoded by a UTF-8 lead byte. Returns 1 for an
+/// ASCII byte (%x00-7F), 2 for a 2-byte lead (%xC2-DF), 3 for a 3-byte lead
+/// (%xE0-EF), 4 for a 4-byte lead (%xF0-F4), or 0 for any other byte
+/// (continuation byte, overlong %xC0/%xC1, or out-of-range %xF5-FF).
+/// For example:
+///
+/// ```cpp
+/// #include <sourcemeta/core/unicode.h>
+/// #include <cassert>
+///
+/// assert(sourcemeta::core::utf8_lead_byte_size(0x41) == 1);
+/// assert(sourcemeta::core::utf8_lead_byte_size(0xCE) == 2);
+/// assert(sourcemeta::core::utf8_lead_byte_size(0xE4) == 3);
+/// assert(sourcemeta::core::utf8_lead_byte_size(0xF0) == 4);
+/// assert(sourcemeta::core::utf8_lead_byte_size(0x80) == 0);
+/// ```
+inline constexpr auto utf8_lead_byte_size(const unsigned char byte)
+    -> std::uint8_t {
+  if (byte < 0x80) {
+    return 1;
+  }
+  if (byte >= 0xC2 && byte <= 0xDF) {
+    return 2;
+  }
+  if (byte >= 0xE0 && byte <= 0xEF) {
+    return 3;
+  }
+  if (byte >= 0xF0 && byte <= 0xF4) {
+    return 4;
+  }
+  return 0;
+}
+
+/// @ingroup unicode
+/// Check whether the given byte is a UTF-8 continuation byte (%x80-BF per
+/// RFC 6532 Section 3.1). For example:
+///
+/// ```cpp
+/// #include <sourcemeta/core/unicode.h>
+/// #include <cassert>
+///
+/// assert(sourcemeta::core::is_utf8_continuation(0x80));
+/// assert(sourcemeta::core::is_utf8_continuation(0xBF));
+/// assert(!sourcemeta::core::is_utf8_continuation(0x7F));
+/// assert(!sourcemeta::core::is_utf8_continuation(0xC0));
+/// ```
+inline constexpr auto is_utf8_continuation(const unsigned char byte) -> bool {
+  return byte >= 0x80 && byte <= 0xBF;
+}
+
+/// @ingroup unicode
+/// Check whether the given codepoint is in the UTF-16 surrogate range
+/// (U+D800 to U+DFFF), which is forbidden in scalar Unicode text.
+/// For example:
+///
+/// ```cpp
+/// #include <sourcemeta/core/unicode.h>
+/// #include <cassert>
+///
+/// assert(sourcemeta::core::is_surrogate(0xD800));
+/// assert(sourcemeta::core::is_surrogate(0xDFFF));
+/// assert(!sourcemeta::core::is_surrogate(0xD7FF));
+/// assert(!sourcemeta::core::is_surrogate(0xE000));
+/// ```
+inline constexpr auto is_surrogate(const char32_t codepoint) -> bool {
+  return codepoint >= 0xD800 && codepoint <= 0xDFFF;
+}
+
+/// @ingroup unicode
+/// Check whether the given value is a valid Unicode codepoint: in the range
+/// U+0000 to U+10FFFF, excluding the UTF-16 surrogate range (U+D800 to
+/// U+DFFF). For example:
+///
+/// ```cpp
+/// #include <sourcemeta/core/unicode.h>
+/// #include <cassert>
+///
+/// assert(sourcemeta::core::is_valid_codepoint(0x0000));
+/// assert(sourcemeta::core::is_valid_codepoint(0x10FFFF));
+/// assert(!sourcemeta::core::is_valid_codepoint(0xD800));
+/// assert(!sourcemeta::core::is_valid_codepoint(0x110000));
+/// ```
+inline constexpr auto is_valid_codepoint(const char32_t codepoint) -> bool {
+  return codepoint <= 0x10FFFF && !is_surrogate(codepoint);
+}
+
+/// @ingroup unicode
+/// Determine the number of UTF-8 bytes that a codepoint encodes to per
+/// RFC 3629: 1 byte for U+0000-U+007F, 2 bytes for U+0080-U+07FF, 3 bytes
+/// for U+0800-U+FFFF, and 4 bytes for U+10000 and above. The caller is
+/// responsible for ensuring the codepoint is in range. For example:
+///
+/// ```cpp
+/// #include <sourcemeta/core/unicode.h>
+/// #include <cassert>
+///
+/// assert(sourcemeta::core::utf8_codepoint_byte_count(0x0041) == 1);
+/// assert(sourcemeta::core::utf8_codepoint_byte_count(0x00E9) == 2);
+/// assert(sourcemeta::core::utf8_codepoint_byte_count(0x4E2D) == 3);
+/// assert(sourcemeta::core::utf8_codepoint_byte_count(0x1F600) == 4);
+/// ```
+inline constexpr auto utf8_codepoint_byte_count(const char32_t codepoint)
+    -> std::uint8_t {
+  if (codepoint < 0x80) {
+    return 1;
+  }
+  if (codepoint < 0x800) {
+    return 2;
+  }
+  if (codepoint < 0x10000) {
+    return 3;
+  }
+  return 4;
+}
+
+/// @ingroup unicode
+/// Determine the byte length of the valid UTF-8 codepoint starting at the
+/// given position within the input. Returns 1 for an ASCII byte, 2/3/4 for a
+/// valid multi-byte UTF-8 sequence (RFC 6532 Section 3.1, excluding overlong
+/// encodings, surrogates, and code points above U+10FFFF), or 0 if the bytes
+/// at that position do not start a valid UTF-8 codepoint. For example:
+///
+/// ```cpp
+/// #include <sourcemeta/core/unicode.h>
+/// #include <cassert>
+///
+/// assert(sourcemeta::core::utf8_codepoint_length("A", 0) == 1);
+/// assert(sourcemeta::core::utf8_codepoint_length("\xce\xb1", 0) == 2);
+/// assert(sourcemeta::core::utf8_codepoint_length("\xe4\xb8\xad", 0) == 3);
+/// assert(sourcemeta::core::utf8_codepoint_length("\xf0\x9f\x98\x80", 0) == 4);
+/// assert(sourcemeta::core::utf8_codepoint_length("\xed\xa0\x80", 0) == 0);
+/// ```
+inline constexpr auto
+utf8_codepoint_length(const std::string_view input,
+                      const std::string_view::size_type position)
+    -> std::size_t {
+  if (position >= input.size()) {
+    return 0;
+  }
+  const auto byte_0{static_cast<unsigned char>(input[position])};
+  const auto size{utf8_lead_byte_size(byte_0)};
+  if (size == 0 || position + size > input.size()) {
+    return 0;
+  }
+  if (size == 1) {
+    return 1;
+  }
+
+  // The second byte after the lead has tighter sub-ranges for specific leads
+  // (RFC 6532 §3.1) that exclude overlong encodings, surrogates, and code
+  // points above U+10FFFF
+  const auto byte_1{static_cast<unsigned char>(input[position + 1])};
+  bool byte_1_ok{false};
+  if (size == 2) {
+    byte_1_ok = is_utf8_continuation(byte_1);
+  } else if (size == 3) {
+    if (byte_0 == 0xE0) {
+      byte_1_ok = byte_1 >= 0xA0 && byte_1 <= 0xBF;
+    } else if (byte_0 == 0xED) {
+      byte_1_ok = byte_1 >= 0x80 && byte_1 <= 0x9F;
+    } else {
+      byte_1_ok = is_utf8_continuation(byte_1);
+    }
+  } else {
+    if (byte_0 == 0xF0) {
+      byte_1_ok = byte_1 >= 0x90 && byte_1 <= 0xBF;
+    } else if (byte_0 == 0xF4) {
+      byte_1_ok = byte_1 >= 0x80 && byte_1 <= 0x8F;
+    } else {
+      byte_1_ok = is_utf8_continuation(byte_1);
+    }
+  }
+
+  if (!byte_1_ok) {
+    return 0;
+  }
+
+  // Remaining continuation bytes (if any) are unconstrained beyond the
+  // continuation byte range
+  for (std::size_t index{2}; index < size; ++index) {
+    if (!is_utf8_continuation(
+            static_cast<unsigned char>(input[position + index]))) {
+      return 0;
+    }
+  }
+
+  return size;
+}
+
 } // namespace sourcemeta::core
 
 #endif
diff --git a/src/core/unicode/unicode.cc b/src/core/unicode/unicode.cc
index fa43ef1002..f85267c525 100644
--- a/src/core/unicode/unicode.cc
+++ b/src/core/unicode/unicode.cc
@@ -7,8 +7,7 @@
 namespace sourcemeta::core {
 
 auto codepoint_to_utf8(const char32_t codepoint, std::ostream &output) -> void {
-  assert(codepoint <= 0x10FFFF);
-  assert(codepoint < 0xD800 || codepoint > 0xDFFF);
+  assert(is_valid_codepoint(codepoint));
   if (codepoint < 0x80) {
     output.put(static_cast<char>(codepoint));
   } else if (codepoint < 0x800) {
@@ -27,8 +26,7 @@ auto codepoint_to_utf8(const char32_t codepoint, std::ostream &output) -> void {
 }
 
 auto codepoint_to_utf8(const char32_t codepoint, std::string &output) -> void {
-  assert(codepoint <= 0x10FFFF);
-  assert(codepoint < 0xD800 || codepoint > 0xDFFF);
+  assert(is_valid_codepoint(codepoint));
   if (codepoint < 0x80) {
     output.push_back(static_cast<char>(codepoint));
   } else if (codepoint < 0x800) {
@@ -57,41 +55,39 @@ auto utf8_to_utf32(std::istream &input) -> std::optional<std::u32string> {
   std::uint8_t byte{0};
 
   while (input.read(reinterpret_cast<char *>(&byte), 1)) {
-    char32_t code_point{0};
-    std::uint8_t continuation_count{0};
-    char32_t minimum{0};
-
-    if (byte < 0x80) {
+    const auto size{utf8_lead_byte_size(byte)};
+    if (size == 0) {
+      return std::nullopt;
+    }
+    if (size == 1) {
       result.push_back(byte);
       continue;
-    } else if ((byte & 0xE0) == 0xC0) {
+    }
+
+    char32_t code_point{0};
+    char32_t minimum{0};
+    if (size == 2) {
       code_point = byte & 0x1F;
-      continuation_count = 1;
       minimum = 0x80;
-    } else if ((byte & 0xF0) == 0xE0) {
+    } else if (size == 3) {
       code_point = byte & 0x0F;
-      continuation_count = 2;
       minimum = 0x800;
-    } else if ((byte & 0xF8) == 0xF0) {
+    } else {
       code_point = byte & 0x07;
-      continuation_count = 3;
       minimum = 0x10000;
-    } else {
-      return std::nullopt;
     }
 
-    for (std::uint8_t index = 0; index < continuation_count; ++index) {
+    for (std::uint8_t index{1}; index < size; ++index) {
       std::uint8_t continuation{0};
       if (!input.read(reinterpret_cast<char *>(&continuation), 1) ||
-          (continuation & 0xC0) != 0x80) {
+          !is_utf8_continuation(continuation)) {
         return std::nullopt;
       }
 
       code_point = (code_point << 6) | (continuation & 0x3F);
     }
 
-    if (code_point < minimum || code_point > 0x10FFFF ||
-        (code_point >= 0xD800 && code_point <= 0xDFFF)) {
+    if (code_point < minimum || !is_valid_codepoint(code_point)) {
       return std::nullopt;
     }
 
diff --git a/test/email/CMakeLists.txt b/test/email/CMakeLists.txt
index 52b5ebf0a8..b78bcac2d8 100644
--- a/test/email/CMakeLists.txt
+++ b/test/email/CMakeLists.txt
@@ -1,5 +1,5 @@
 sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME email
-  SOURCES email_test.cc)
+  SOURCES email_test.cc idn_email_test.cc)
 
 target_link_libraries(sourcemeta_core_email_unit
   PRIVATE sourcemeta::core::email)
diff --git a/test/email/email_test.cc b/test/email/email_test.cc
index 161e9593b9..54c6b6fb87 100644
--- a/test/email/email_test.cc
+++ b/test/email/email_test.cc
@@ -7,321 +7,385 @@
 // RFC 5321 §4.1.2: minimal Dot-string Atom + minimal Domain sub-domain
 TEST(Email, valid_dot_string_single_letter) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@b"));
 }
 
 // RFC 5321 §4.1.2: atext includes DIGIT
 TEST(Email, valid_dot_string_single_digit) {
   EXPECT_TRUE(sourcemeta::core::is_email("1@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("1@b"));
 }
 
 // RFC 5321 §4.1.2: Atom = 1*atext
 TEST(Email, valid_dot_string_multi_letter_atom) {
   EXPECT_TRUE(sourcemeta::core::is_email("abc@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("abc@b"));
 }
 
 // RFC 5321 §4.1.2: atext mixes ALPHA and DIGIT
 TEST(Email, valid_dot_string_alpha_digit_mix) {
   EXPECT_TRUE(sourcemeta::core::is_email("a1b2@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a1b2@b"));
 }
 
 // RFC 5321 §4.1.2: Dot-string = Atom *("." Atom) with two atoms
 TEST(Email, valid_dot_string_two_atoms) {
   EXPECT_TRUE(sourcemeta::core::is_email("a.b@c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a.b@c"));
 }
 
 // RFC 5321 §4.1.2: Dot-string with many atoms
 TEST(Email, valid_dot_string_many_atoms) {
   EXPECT_TRUE(sourcemeta::core::is_email("a.b.c.d.e@f"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a.b.c.d.e@f"));
 }
 
 // RFC 5321 §4.1.2: ALPHA covers A-Z
 TEST(Email, valid_dot_string_uppercase_atom) {
   EXPECT_TRUE(sourcemeta::core::is_email("ABC@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("ABC@b"));
 }
 
 // RFC 5321 §4.1.2: ALPHA covers both cases in one atom
 TEST(Email, valid_dot_string_mixed_case_atom) {
   EXPECT_TRUE(sourcemeta::core::is_email("aBc@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("aBc@b"));
 }
 
 // RFC 5321 §4.1.2: "!" is atext
 TEST(Email, valid_atext_bang) {
   EXPECT_TRUE(sourcemeta::core::is_email("!@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("!@b"));
 }
 
 // RFC 5321 §4.1.2: "#" is atext
 TEST(Email, valid_atext_hash) {
   EXPECT_TRUE(sourcemeta::core::is_email("#@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("#@b"));
 }
 
 // RFC 5321 §4.1.2: "$" is atext
 TEST(Email, valid_atext_dollar) {
   EXPECT_TRUE(sourcemeta::core::is_email("$@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("$@b"));
 }
 
 // RFC 5321 §4.1.2: "%" is atext
 TEST(Email, valid_atext_percent) {
   EXPECT_TRUE(sourcemeta::core::is_email("%@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("%@b"));
 }
 
 // RFC 5321 §4.1.2: "&" is atext
 TEST(Email, valid_atext_ampersand) {
   EXPECT_TRUE(sourcemeta::core::is_email("&@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("&@b"));
 }
 
 // RFC 5321 §4.1.2: "'" is atext
 TEST(Email, valid_atext_apostrophe) {
   EXPECT_TRUE(sourcemeta::core::is_email("'@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("'@b"));
 }
 
 // RFC 5321 §4.1.2: "*" is atext
 TEST(Email, valid_atext_asterisk) {
   EXPECT_TRUE(sourcemeta::core::is_email("*@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("*@b"));
 }
 
 // RFC 5321 §4.1.2: "+" is atext
 TEST(Email, valid_atext_plus) {
   EXPECT_TRUE(sourcemeta::core::is_email("+@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("+@b"));
 }
 
 // RFC 5321 §4.1.2: "-" is atext
 TEST(Email, valid_atext_hyphen) {
   EXPECT_TRUE(sourcemeta::core::is_email("-@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("-@b"));
 }
 
 // RFC 5321 §4.1.2: "/" is atext
 TEST(Email, valid_atext_slash) {
   EXPECT_TRUE(sourcemeta::core::is_email("/@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("/@b"));
 }
 
 // RFC 5321 §4.1.2: "=" is atext
 TEST(Email, valid_atext_equals) {
   EXPECT_TRUE(sourcemeta::core::is_email("=@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("=@b"));
 }
 
 // RFC 5321 §4.1.2: "?" is atext
 TEST(Email, valid_atext_question) {
   EXPECT_TRUE(sourcemeta::core::is_email("?@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("?@b"));
 }
 
 // RFC 5321 §4.1.2: "^" is atext
 TEST(Email, valid_atext_caret) {
   EXPECT_TRUE(sourcemeta::core::is_email("^@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("^@b"));
 }
 
 // RFC 5321 §4.1.2: "_" is atext
 TEST(Email, valid_atext_underscore) {
   EXPECT_TRUE(sourcemeta::core::is_email("_@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("_@b"));
 }
 
 // RFC 5321 §4.1.2: "`" is atext
 TEST(Email, valid_atext_backtick) {
   EXPECT_TRUE(sourcemeta::core::is_email("`@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("`@b"));
 }
 
 // RFC 5321 §4.1.2: "{" is atext
 TEST(Email, valid_atext_lbrace) {
   EXPECT_TRUE(sourcemeta::core::is_email("{@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("{@b"));
 }
 
 // RFC 5321 §4.1.2: "|" is atext
 TEST(Email, valid_atext_pipe) {
   EXPECT_TRUE(sourcemeta::core::is_email("|@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("|@b"));
 }
 
 // RFC 5321 §4.1.2: "}" is atext
 TEST(Email, valid_atext_rbrace) {
   EXPECT_TRUE(sourcemeta::core::is_email("}@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("}@b"));
 }
 
 // RFC 5321 §4.1.2: "~" is atext
 TEST(Email, valid_atext_tilde) {
   EXPECT_TRUE(sourcemeta::core::is_email("~@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("~@b"));
 }
 
 // RFC 5321 §4.1.2: a single Atom may include every atext special at once
 TEST(Email, valid_dot_string_all_specials_one_atom) {
   EXPECT_TRUE(sourcemeta::core::is_email("!#$%&'*+-/=?^_`{|}~@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("!#$%&'*+-/=?^_`{|}~@b"));
 }
 
 // RFC 5321 §4.5.3.1.1: Local-part octet limit is 64
 TEST(Email, valid_local_part_length_64) {
   EXPECT_TRUE(sourcemeta::core::is_email(std::string(64, 'a') + "@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(std::string(64, 'a') + "@b"));
 }
 
 // RFC 5321 §4.1.2: Atom = 1*atext, single byte is the minimum
 TEST(Email, valid_local_part_length_1) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@b"));
 }
 
 // RFC 5321 §4.1.2: Mailbox requires both a Local-part and a Domain
 TEST(Email, invalid_no_at_sign) {
   EXPECT_FALSE(sourcemeta::core::is_email("plain"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("plain"));
 }
 
 // RFC 5321 §4.1.2: Local-part = Dot-string / Quoted-string, both non-empty
 TEST(Email, invalid_only_at_sign) {
   EXPECT_FALSE(sourcemeta::core::is_email("@"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("@"));
 }
 
 // RFC 5321 §4.1.2: Atom = 1*atext, empty Local-part is invalid
 TEST(Email, invalid_empty_local) {
   EXPECT_FALSE(sourcemeta::core::is_email("@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("@b"));
 }
 
 // RFC 5321 §4.1.2: Domain = sub-domain *("." sub-domain), empty is invalid
 TEST(Email, invalid_empty_domain) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@"));
 }
 
 // RFC 5321 §4.1.2: only one "@" allowed outside a Quoted-string
 TEST(Email, invalid_two_at_signs_unquoted) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b@c"));
 }
 
 // RFC 5321 §4.1.2: three unquoted "@" signs
 TEST(Email, invalid_three_at_signs_unquoted) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b@c@d"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b@c@d"));
 }
 
 // RFC 5321 §4.1.2: Dot-string requires a leading Atom
 TEST(Email, invalid_local_leading_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email(".a@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(".a@b"));
 }
 
 // RFC 5321 §4.1.2: Dot-string requires a trailing Atom
 TEST(Email, invalid_local_trailing_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a.@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a.@b"));
 }
 
 // RFC 5321 §4.1.2: Atom = 1*atext, double dot yields an empty Atom
 TEST(Email, invalid_local_double_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a..b@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a..b@c"));
 }
 
 // RFC 5321 §4.1.2: lone "." has no atext on either side
 TEST(Email, invalid_local_only_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email(".@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(".@b"));
 }
 
 // RFC 5321 §4.1.2: ".." has no atext between the dots
 TEST(Email, invalid_local_only_dots) {
   EXPECT_FALSE(sourcemeta::core::is_email("..@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("..@b"));
 }
 
 // RFC 5321 §4.5.3.1.1: Local-part may not exceed 64 octets
 TEST(Email, invalid_local_part_length_65) {
   EXPECT_FALSE(sourcemeta::core::is_email(std::string(65, 'a') + "@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(std::string(65, 'a') + "@b"));
 }
 
 // RFC 5321 §4.1.2: "(" is not in atext
 TEST(Email, invalid_atext_lparen) {
   EXPECT_FALSE(sourcemeta::core::is_email("(@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("(@b"));
 }
 
 // RFC 5321 §4.1.2: ")" is not in atext
 TEST(Email, invalid_atext_rparen) {
   EXPECT_FALSE(sourcemeta::core::is_email(")@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(")@b"));
 }
 
 // RFC 5321 §4.1.2: "," is not in atext
 TEST(Email, invalid_atext_comma) {
   EXPECT_FALSE(sourcemeta::core::is_email(",@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(",@b"));
 }
 
 // RFC 5321 §4.1.2: ":" is not in atext
 TEST(Email, invalid_atext_colon) {
   EXPECT_FALSE(sourcemeta::core::is_email(":@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(":@b"));
 }
 
 // RFC 5321 §4.1.2: ";" is not in atext
 TEST(Email, invalid_atext_semicolon) {
   EXPECT_FALSE(sourcemeta::core::is_email(";@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(";@b"));
 }
 
 // RFC 5321 §4.1.2: "<" is not in atext
 TEST(Email, invalid_atext_lt) {
   EXPECT_FALSE(sourcemeta::core::is_email("<@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("<@b"));
 }
 
 // RFC 5321 §4.1.2: ">" is not in atext
 TEST(Email, invalid_atext_gt) {
   EXPECT_FALSE(sourcemeta::core::is_email(">@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(">@b"));
 }
 
 // RFC 5321 §4.1.2: "[" is not in atext
 TEST(Email, invalid_atext_lbracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("[@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("[@b"));
 }
 
 // RFC 5321 §4.1.2: "\" is not in atext (only valid inside quoted-pair)
 TEST(Email, invalid_atext_backslash) {
   EXPECT_FALSE(sourcemeta::core::is_email("\\@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\\@b"));
 }
 
 // RFC 5321 §4.1.2: "]" is not in atext
 TEST(Email, invalid_atext_rbracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("]@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("]@b"));
 }
 
 // RFC 5321 §4.1.2: SP is not in atext
 TEST(Email, invalid_atext_space_unquoted) {
   EXPECT_FALSE(sourcemeta::core::is_email(" @b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(" @b"));
 }
 
 // RFC 5321 §4.1.2: atext is ASCII; bytes >= 0x80 are excluded
 TEST(Email, invalid_local_high_bit_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("a\x80@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a\x80@b"));
 }
 
 // RFC 5321 §4.1.2: NUL is not in atext
 TEST(Email, invalid_local_nul_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email(std::string_view{"a\x00@b", 4}));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(std::string_view{"a\x00@b", 4}));
 }
 
 // RFC 5321 §4.1.2: control characters are not in atext
 TEST(Email, invalid_local_control_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("a\x01@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a\x01@b"));
 }
 
 // RFC 5321 §4.1.2: DEL is not in atext
 TEST(Email, invalid_local_del_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("a\x7f@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a\x7f@b"));
 }
 
 // RFC 5321 §4.1.2 Domain: sub-domain *("." sub-domain) with one label
 TEST(Email, valid_domain_single_label) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@b"));
 }
 
 // RFC 5321 §4.1.2 Domain: two labels separated by "."
 TEST(Email, valid_domain_two_labels) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@b.c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@b.c"));
 }
 
 // RFC 5321 §4.1.2 Domain: many labels
 TEST(Email, valid_domain_three_labels) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@b.c.d"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@b.c.d"));
 }
 
 // RFC 5321 §4.1.2 Domain: sub-domain = Let-dig [Ldh-str], digit is Let-dig
 TEST(Email, valid_domain_label_starts_with_digit) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@1b.c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@1b.c"));
 }
 
 // RFC 5321 §4.1.2 Domain: grammar allows numeric-only labels
 TEST(Email, valid_domain_numeric_tld) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@example.123"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@example.123"));
 }
 
 // RFC 5321 §4.1.2 Domain: case is preserved but accepted
 TEST(Email, valid_domain_uppercase) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@EXAMPLE.COM"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@EXAMPLE.COM"));
 }
 
 // RFC 1035 §2.3.4 via §4.1.2 Domain: 63-byte label is the cap
 TEST(Email, valid_domain_label_63) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@" + std::string(63, 'b')));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@" + std::string(63, 'b')));
 }
 
 // RFC 5321 §4.5.3.1.2 Domain: 255-byte domain is the cap
@@ -329,41 +393,51 @@ TEST(Email, valid_domain_total_255) {
   EXPECT_TRUE(sourcemeta::core::is_email(
       "a@" + std::string(63, 'b') + "." + std::string(63, 'c') + "." +
       std::string(63, 'd') + "." + std::string(63, 'e')));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(
+      "a@" + std::string(63, 'b') + "." + std::string(63, 'c') + "." +
+      std::string(63, 'd') + "." + std::string(63, 'e')));
 }
 
 // RFC 5321 §4.1.2 Domain: Ldh-str excludes "_"
 TEST(Email, invalid_domain_underscore) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@host_name"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@host_name"));
 }
 
 // RFC 5321 §4.1.2 Domain: sub-domain must start with Let-dig
 TEST(Email, invalid_domain_leading_hyphen) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@-host"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@-host"));
 }
 
 // RFC 5321 §4.1.2 Domain: Ldh-str must end with Let-dig
 TEST(Email, invalid_domain_trailing_hyphen) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@host-"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@host-"));
 }
 
 // RFC 5321 §4.1.2 Domain: no trailing "."
 TEST(Email, invalid_domain_trailing_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@host."));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@host."));
 }
 
 // RFC 5321 §4.1.2 Domain: "." between labels requires a sub-domain on each side
 TEST(Email, invalid_domain_double_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@x..y"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@x..y"));
 }
 
 // RFC 5321 §4.1.2 Domain: no leading "."
 TEST(Email, invalid_domain_leading_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@.host"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@.host"));
 }
 
 // RFC 1035 §2.3.4 via §4.1.2 Domain: 64-byte label exceeds cap
 TEST(Email, invalid_domain_label_64) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@" + std::string(64, 'b')));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@" + std::string(64, 'b')));
 }
 
 // RFC 5321 §4.5.3.1.2 Domain: 256-byte domain exceeds cap (63 + "." + 63 +
@@ -372,195 +446,237 @@ TEST(Email, invalid_domain_total_256) {
   EXPECT_FALSE(sourcemeta::core::is_email(
       "a@" + std::string(63, 'b') + "." + std::string(63, 'c') + "." +
       std::string(63, 'd') + "." + std::string(62, 'e') + ".f"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(
+      "a@" + std::string(63, 'b') + "." + std::string(63, 'c') + "." +
+      std::string(63, 'd') + "." + std::string(62, 'e') + ".f"));
 }
 
 // RFC 5321 §4.1.2 Domain: ASCII only, bytes >= 0x80 excluded
 TEST(Email, invalid_domain_high_bit_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@hos\x80t"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@hos\x80t"));
 }
 
 // RFC 5321 §4.1.2 Domain: SP is not in Ldh-str
 TEST(Email, invalid_domain_space) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@host name"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@host name"));
 }
 
 // RFC 5321 §4.1.2: Quoted-string = DQUOTE *QcontentSMTP DQUOTE permits zero
 // content bytes
 TEST(Email, valid_quoted_empty) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP includes %d32 SP
 TEST(Email, valid_quoted_single_space) {
   EXPECT_TRUE(sourcemeta::core::is_email("\" \"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\" \"@b"));
 }
 
 // RFC 5321 §4.1.2: any atext byte is also in qtextSMTP
 TEST(Email, valid_quoted_single_atext) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP %d35-91 includes "@"
 TEST(Email, valid_quoted_at_inside) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a@b\"@c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a@b\"@c"));
 }
 
 // RFC 5321 §4.1.2: Dot-string rules do not apply inside Quoted-string
 TEST(Email, valid_quoted_dot_inside) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a.b\"@c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a.b\"@c"));
 }
 
 // RFC 5321 §4.1.2: Quoted-string permits a leading "." inside
 TEST(Email, valid_quoted_dot_at_start) {
   EXPECT_TRUE(sourcemeta::core::is_email("\".a\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\".a\"@b"));
 }
 
 // RFC 5321 §4.1.2: Quoted-string permits a trailing "." inside
 TEST(Email, valid_quoted_dot_at_end) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a.\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a.\"@b"));
 }
 
 // RFC 5321 §4.1.2: Quoted-string permits consecutive "." inside
 TEST(Email, valid_quoted_double_dot_inside) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a..b\"@c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a..b\"@c"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP starts at %d32 (SP)
 TEST(Email, valid_quoted_qtext_d32_space) {
   EXPECT_TRUE(sourcemeta::core::is_email("\" \"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\" \"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP includes %d33 "!"
 TEST(Email, valid_quoted_qtext_d33_bang) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"!\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"!\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP resumes at %d35 "#" after skipping DQUOTE
 TEST(Email, valid_quoted_qtext_d35_hash) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"#\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"#\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP %d35-91 ends at "[" (%d91)
 TEST(Email, valid_quoted_qtext_d91_lbracket) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"[\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"[\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP resumes at "]" (%d93) after skipping "\\"
 TEST(Email, valid_quoted_qtext_d93_rbracket) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"]\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"]\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP ends at "~" (%d126)
 TEST(Email, valid_quoted_qtext_d126_tilde) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"~\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"~\"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP escapes DQUOTE
 TEST(Email, valid_quoted_pair_dquote) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\\\"\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\\\"\"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP escapes backslash itself
 TEST(Email, valid_quoted_pair_backslash) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\\\\\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\\\\\"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body %d32 is SP
 TEST(Email, valid_quoted_pair_space) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\\ \"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\\ \"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body covers any ASCII graphic
 TEST(Email, valid_quoted_pair_letter) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\\a\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\\a\"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body %d126 is "~"
 TEST(Email, valid_quoted_pair_tilde) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\\~\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\\~\"@b"));
 }
 
 // RFC 5321 §4.1.2: Quoted-string accepts qtextSMTP and quoted-pairSMTP mixed
 TEST(Email, valid_quoted_mixed_qtext_and_pair) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a\\\"b\\\\c\"@d"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a\\\"b\\\\c\"@d"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP includes ',' and ';' (both in %d35-91)
 TEST(Email, valid_quoted_with_comma_semicolon) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a,b;c\"@d"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a,b;c\"@d"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP includes '(' and ')' (both in %d35-91)
 TEST(Email, valid_quoted_with_parens) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"(comment)\"@d"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"(comment)\"@d"));
 }
 
 // RFC 5321 §4.5.3.1.1: 62 qtext bytes plus two DQUOTEs equals 64 octets
 TEST(Email, valid_quoted_local_length_64) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"" + std::string(62, 'a') + "\"@b"));
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("\"" + std::string(62, 'a') + "\"@b"));
 }
 
 // RFC 5321 §4.1.2: Quoted-string must terminate with DQUOTE
 TEST(Email, invalid_quoted_unterminated) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"foo@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"foo@b"));
 }
 
 // RFC 5321 §4.1.2: extra content after closing DQUOTE is not Mailbox grammar
 TEST(Email, invalid_quoted_followed_by_atext) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"a\"b@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"a\"b@c"));
 }
 
 // RFC 5321 §4.1.2: Mailbox grammar does not permit mixing Quoted-string and
 // Dot-string in a Local-part
 TEST(Email, invalid_quoted_followed_by_dot_atext) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"a\".b@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"a\".b@c"));
 }
 
 // RFC 5321 §4.1.2: a Quoted-string cannot be preceded by atext
 TEST(Email, invalid_quoted_preceded_by_atext) {
   EXPECT_FALSE(sourcemeta::core::is_email("a\"b\"@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a\"b\"@c"));
 }
 
 // RFC 5321 §4.1.2: bare DQUOTE inside a Quoted-string closes it; the trailing
 // bytes break the Mailbox grammar
 TEST(Email, invalid_quoted_bare_dquote_inside) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"a\"b\"@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"a\"b\"@c"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP consumes the byte after "\\"; if that byte
 // is DQUOTE the Quoted-string is left unterminated
 TEST(Email, invalid_quoted_dangling_backslash) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"a\\\"@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"a\\\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP excludes controls (%d0-31)
 TEST(Email, invalid_quoted_qtext_control_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"\x01\"@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\x01\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP excludes NUL
 TEST(Email, invalid_quoted_qtext_nul) {
   EXPECT_FALSE(sourcemeta::core::is_email(std::string_view{"\"\x00\"@b", 5}));
+  EXPECT_FALSE(
+      sourcemeta::core::is_idn_email(std::string_view{"\"\x00\"@b", 5}));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP is ASCII, bytes >= 0x80 are excluded
 TEST(Email, invalid_quoted_qtext_high_bit) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"\x80\"@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\x80\"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body is %d32-126, controls are excluded
 TEST(Email, invalid_quoted_pair_control_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"\\\x01\"@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\\\x01\"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body ends at %d126, DEL is excluded
 TEST(Email, invalid_quoted_pair_del_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"\\\x7f\"@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\\\x7f\"@b"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body is ASCII, bytes >= 0x80 are excluded
 TEST(Email, invalid_quoted_pair_high_bit_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"\\\x80\"@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\\\x80\"@b"));
 }
 
 // RFC 5321 §4.5.3.1.1: 63 qtext bytes plus two DQUOTEs equals 65 octets, one
@@ -568,160 +684,193 @@ TEST(Email, invalid_quoted_pair_high_bit_byte) {
 TEST(Email, invalid_quoted_local_length_65) {
   EXPECT_FALSE(
       sourcemeta::core::is_email("\"" + std::string(63, 'a') + "\"@b"));
+  EXPECT_FALSE(
+      sourcemeta::core::is_idn_email("\"" + std::string(63, 'a') + "\"@b"));
 }
 
 // RFC 5321 §4.1.2: a Quoted-string may contain an unquoted "@" but the outer
 // boundary "@" is still required after the closing DQUOTE
 TEST(Email, valid_two_at_signs_one_quoted) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"a@b\"@c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a@b\"@c"));
 }
 
 // RFC 5321 §4.1.2: two embedded "@" inside a Quoted-string are still qtextSMTP
 TEST(Email, valid_two_at_signs_quoted_with_atext) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"x@y@z\"@d"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"x@y@z\"@d"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal Snum 3("." Snum) covers 0.0.0.0
 TEST(Email, valid_ipv4_literal_zeros) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[0.0.0.0]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[0.0.0.0]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal Snum max is 255
 TEST(Email, valid_ipv4_literal_max) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[255.255.255.255]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[255.255.255.255]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal typical RFC 1918 address
 TEST(Email, valid_ipv4_literal_typical) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[192.168.1.1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[192.168.1.1]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal loopback
 TEST(Email, valid_ipv4_literal_loopback) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[127.0.0.1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[127.0.0.1]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal one-digit Snum
 TEST(Email, valid_ipv4_literal_single_digit) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[1.2.3.4]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[1.2.3.4]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal two-digit Snum
 TEST(Email, valid_ipv4_literal_two_digit) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[10.20.30.40]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[10.20.30.40]"));
 }
 
 // RFC 5321 §4.1.2: Domain accepts numeric-only labels without brackets
 TEST(Email, valid_domain_dotted_numeric) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@1.2.3.4"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@1.2.3.4"));
 }
 
 // RFC 5321 §4.1.3: Snum value range 0-255, 256 is out of range
 TEST(Email, invalid_ipv4_octet_256) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[256.0.0.0]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[256.0.0.0]"));
 }
 
 // RFC 5321 §4.1.3: Snum value range 0-255, 999 is out of range
 TEST(Email, invalid_ipv4_octet_999) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[999.0.0.1]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[999.0.0.1]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal requires exactly four Snum octets
 TEST(Email, invalid_ipv4_three_octets) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[1.2.3]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[1.2.3]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal rejects five Snum octets
 TEST(Email, invalid_ipv4_five_octets) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[1.2.3.4.5]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[1.2.3.4.5]"));
 }
 
 // RFC 5321 §4.1.3: Snum = 1*3DIGIT, leading zero in multi-digit Snum is
 // rejected by is_ipv4
 TEST(Email, invalid_ipv4_leading_zero) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[01.2.3.4]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[01.2.3.4]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal cannot end with a "."
 TEST(Email, invalid_ipv4_trailing_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[1.2.3.4.]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[1.2.3.4.]"));
 }
 
 // RFC 5321 §4.1.3: IPv4-address-literal cannot begin with a "."
 TEST(Email, invalid_ipv4_leading_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[.1.2.3.4]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[.1.2.3.4]"));
 }
 
 // RFC 5321 §4.1.3: Snum = 1*3DIGIT, "-" is not a digit
 TEST(Email, invalid_ipv4_negative_octet) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[-1.2.3.4]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[-1.2.3.4]"));
 }
 
 // RFC 5321 §4.1.3: Snum = 1*3DIGIT, alphabetic bytes are not digits
 TEST(Email, invalid_ipv4_alpha_octet) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[1.2.a.4]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[1.2.a.4]"));
 }
 
 // RFC 5321 §4.1.3: address-literal requires a closing "]"
 TEST(Email, invalid_ipv4_missing_close_bracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[1.2.3.4"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[1.2.3.4"));
 }
 
 // RFC 5321 §4.1.2 Domain: "]" is not in Ldh-str, so unbracketed forms fail
 TEST(Email, invalid_ipv4_missing_open_bracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@1.2.3.4]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@1.2.3.4]"));
 }
 
 // RFC 5321 §4.1.3: no content is permitted after the closing "]"
 TEST(Email, invalid_ipv4_trailing_garbage) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[1.2.3.4]x"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[1.2.3.4]x"));
 }
 
 // RFC 5321 §4.1.2 Domain: "[" is not in Ldh-str, so embedded brackets fail
 TEST(Email, invalid_ipv4_leading_garbage) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@x[1.2.3.4]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@x[1.2.3.4]"));
 }
 
 // RFC 5321 §4.1.3: address-literal requires at least one of IPv4, IPv6, or
 // General-address-literal between the brackets
 TEST(Email, invalid_ipv4_empty_brackets) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[]"));
 }
 
 // RFC 5321 §4.1.3: IPv6-address-literal = "IPv6:" IPv6-addr, loopback form
 TEST(Email, valid_ipv6_literal_loopback) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:::1]"));
 }
 
 // RFC 4291 §2.2: "::" compresses an all-zeros address
 TEST(Email, valid_ipv6_literal_all_zeros) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:::]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:::]"));
 }
 
 // RFC 5321 §4.1.3: IPv6-address-literal with one compressed group
 TEST(Email, valid_ipv6_literal_typical) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:2001:db8::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:2001:db8::1]"));
 }
 
 // RFC 4291 §2.2: IPv6-full form with eight 1-4 hex groups
 TEST(Email, valid_ipv6_literal_full_form) {
   EXPECT_TRUE(sourcemeta::core::is_email(
       "a@[IPv6:2001:0db8:0000:0000:0000:0000:0000:0001]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(
+      "a@[IPv6:2001:0db8:0000:0000:0000:0000:0000:0001]"));
 }
 
 // RFC 4291 §2.5.6: link-local prefix fe80::/10
 TEST(Email, valid_ipv6_literal_link_local) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:fe80::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:fe80::1]"));
 }
 
 // RFC 4291 §2.2: IPv4-mapped IPv6 address form
 TEST(Email, valid_ipv6_literal_v4_mapped) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:::ffff:192.168.1.1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:::ffff:192.168.1.1]"));
 }
 
 // RFC 4291 §2.5.5: IPv4-compatible IPv6 address form
 TEST(Email, valid_ipv6_literal_v4_compat) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:::192.168.1.1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:::192.168.1.1]"));
 }
 
 // RFC 5321 §4.1.3: without the "IPv6:" tag the literal is parsed as
@@ -729,22 +878,26 @@ TEST(Email, valid_ipv6_literal_v4_compat) {
 // because ":" is in dcontent (%d58 is within %d33-90)
 TEST(Email, valid_no_ipv6_prefix_as_general) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[2001:db8::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[2001:db8::1]"));
 }
 
 // RFC 5234 §2.3: ABNF literal strings are case-insensitive by default, so the
 // "IPv6:" prefix matches "ipv6:"
 TEST(Email, valid_lowercase_ipv6_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[ipv6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[ipv6:::1]"));
 }
 
 // RFC 5234 §2.3: case-insensitive literal also matches "IPV6:"
 TEST(Email, valid_uppercase_ipv6_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPV6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPV6:::1]"));
 }
 
 // RFC 5234 §2.3: mixed-case prefix also matches the IPv6 tag
 TEST(Email, valid_mixed_case_ipv6_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[iPv6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[iPv6:::1]"));
 }
 
 // RFC 5234 §3.2: ABNF alternatives are unordered. The literal five-byte
@@ -752,152 +905,182 @@ TEST(Email, valid_mixed_case_ipv6_literal) {
 // through to General-address-literal with tag "IPv6" and content ":1"
 TEST(Email, valid_ipv6_prefix_no_colon_as_general) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6::1]"));
 }
 
 // RFC 5234 §3.2: a failed IPv6-addr match falls through to General-address-
 // literal with tag "IPv6" and content "not-an-address" (all dcontent)
 TEST(Email, valid_ipv6_body_garbage_as_general) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:not-an-address]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:not-an-address]"));
 }
 
 // RFC 5321 §4.1.3: IPv6-addr requires at least one group
 TEST(Email, invalid_ipv6_body_empty) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[IPv6:]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[IPv6:]"));
 }
 
 // RFC 5234 §3.2: nine groups fail IPv6-addr but the input still matches
 // General-address-literal with tag "IPv6" and content "1:2:3:4:5:6:7:8:9"
 TEST(Email, valid_ipv6_too_many_groups_as_general) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv6:1:2:3:4:5:6:7:8:9]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv6:1:2:3:4:5:6:7:8:9]"));
 }
 
 // RFC 5321 §4.1.3: address-literal needs a closing "]"
 TEST(Email, invalid_ipv6_missing_close_bracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[IPv6:::1"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[IPv6:::1"));
 }
 
 // RFC 5321 §4.1.3: no content is permitted after the closing "]"
 TEST(Email, invalid_ipv6_trailing_garbage) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[IPv6:::1]x"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[IPv6:::1]x"));
 }
 
 // RFC 5321 §4.1.3: General-address-literal = Standardized-tag ":" 1*dcontent
 TEST(Email, valid_general_literal_minimal) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[X:y]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[X:y]"));
 }
 
 // RFC 5321 §4.1.3: typical X400 tag from the IANA Standardized-tag registry
 TEST(Email, valid_general_literal_x400) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[X400:foo]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[X400:foo]"));
 }
 
 // RFC 5321 §4.1.2: Ldh-str body permits DIGIT before the terminal Let-dig
 TEST(Email, valid_general_literal_tag_with_digits) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[tag1:foo]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[tag1:foo]"));
 }
 
 // RFC 5321 §4.1.2: Ldh-str body permits interior "-"
 TEST(Email, valid_general_literal_tag_with_interior_hyphen) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[tag-name:foo]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[tag-name:foo]"));
 }
 
 // RFC 5321 §4.1.2: Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig permits a
 // leading "-" because Standardized-tag is Ldh-str, not Let-dig [Ldh-str]
 TEST(Email, valid_general_literal_tag_leading_hyphen) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[-tag:foo]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[-tag:foo]"));
 }
 
 // RFC 5321 §4.1.3: dcontent starts at %d33 "!"
 TEST(Email, valid_general_literal_dcontent_lower_bound) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[Tag:!]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[Tag:!]"));
 }
 
 // RFC 5321 §4.1.3: dcontent %d33-90 ends at "Z"
 TEST(Email, valid_general_literal_dcontent_upper_first_range) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[Tag:Z]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[Tag:Z]"));
 }
 
 // RFC 5321 §4.1.3: dcontent %d94-126 starts at "^"
 TEST(Email, valid_general_literal_dcontent_lower_second_range) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[Tag:^]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[Tag:^]"));
 }
 
 // RFC 5321 §4.1.3: dcontent ends at "~" (%d126)
 TEST(Email, valid_general_literal_dcontent_upper_bound) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[Tag:~]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[Tag:~]"));
 }
 
 // RFC 5321 §4.1.3: dcontent %d33-90 includes ":" (%d58)
 TEST(Email, valid_general_literal_dcontent_with_colon) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[Tag:foo:bar]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[Tag:foo:bar]"));
 }
 
 // RFC 5321 §4.1.3: 1*dcontent permits long content
 TEST(Email, valid_general_literal_dcontent_long) {
   EXPECT_TRUE(
       sourcemeta::core::is_email("a@[Tag:" + std::string(200, 'a') + "]"));
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("a@[Tag:" + std::string(200, 'a') + "]"));
 }
 
 // RFC 5321 §4.1.2: Ldh-str must end with Let-dig, trailing "-" is invalid
 TEST(Email, invalid_general_tag_trailing_hyphen) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[tag-:x]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[tag-:x]"));
 }
 
 // RFC 5321 §4.1.2: Ldh-str requires a terminal Let-dig, lone "-" is invalid
 TEST(Email, invalid_general_tag_single_hyphen) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[-:x]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[-:x]"));
 }
 
 // RFC 5321 §4.1.2: Ldh-str alphabet excludes "_"
 TEST(Email, invalid_general_tag_with_underscore) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[tag_name:x]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[tag_name:x]"));
 }
 
 // RFC 5321 §4.1.2: Standardized-tag = Ldh-str, minimum length is one byte
 TEST(Email, invalid_general_tag_empty) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[:x]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[:x]"));
 }
 
 // RFC 5321 §4.1.3: General-address-literal = tag ":" 1*dcontent, empty content
 // is invalid
 TEST(Email, invalid_general_empty_dcontent) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400:]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400:]"));
 }
 
 // RFC 5321 §4.1.3: General-address-literal requires ":" between tag and
 // content
 TEST(Email, invalid_general_no_colon) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400foo]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400foo]"));
 }
 
 // RFC 5321 §4.1.3: dcontent excludes "[" (%d91)
 TEST(Email, invalid_general_dcontent_lbracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400:[]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400:[]"));
 }
 
 // RFC 5321 §4.1.3: dcontent excludes "\\" (%d92)
 TEST(Email, invalid_general_dcontent_backslash) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400:\\]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400:\\]"));
 }
 
 // RFC 5321 §4.1.3: dcontent excludes "]" (%d93)
 TEST(Email, invalid_general_dcontent_rbracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400:a]b]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400:a]b]"));
 }
 
 // RFC 5321 §4.1.3: dcontent excludes SP (%d32)
 TEST(Email, invalid_general_dcontent_space) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400:a b]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400:a b]"));
 }
 
 // RFC 5321 §4.1.3: dcontent excludes controls (%d0-31)
 TEST(Email, invalid_general_dcontent_control) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400:\x01]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400:\x01]"));
 }
 
 // RFC 5321 §4.1.3: dcontent is ASCII, bytes >= 0x80 are excluded
 TEST(Email, invalid_general_dcontent_high_bit) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[X400:\x80]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[X400:\x80]"));
 }
 
 // RFC 5321 §4.5.3.1: 64-byte Local-part plus "@" plus a 254-byte Domain still
@@ -907,12 +1090,17 @@ TEST(Email, valid_max_length_email) {
       std::string(64, 'a') + "@" + std::string(63, 'b') + "." +
       std::string(63, 'c') + "." + std::string(63, 'd') + "." +
       std::string(62, 'e')));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(
+      std::string(64, 'a') + "@" + std::string(63, 'b') + "." +
+      std::string(63, 'c') + "." + std::string(63, 'd') + "." +
+      std::string(62, 'e')));
 }
 
 // RFC 5321 §4.5.3.1.1: 65-byte Local-part exceeds the cap even with a valid
 // Domain
 TEST(Email, invalid_local_65_with_valid_domain) {
   EXPECT_FALSE(sourcemeta::core::is_email(std::string(65, 'a') + "@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(std::string(65, 'a') + "@b"));
 }
 
 // RFC 5321 §4.5.3.1.2: a Domain exceeding the cap is rejected even with a
@@ -922,37 +1110,47 @@ TEST(Email, invalid_local_64_with_domain_over_cap) {
       std::string(64, 'a') + "@" + std::string(63, 'b') + "." +
       std::string(63, 'c') + "." + std::string(63, 'd') + "." +
       std::string(63, 'e') + ".f"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(
+      std::string(64, 'a') + "@" + std::string(63, 'b') + "." +
+      std::string(63, 'c') + "." + std::string(63, 'd') + "." +
+      std::string(63, 'e') + ".f"));
 }
 
 // RFC 5321 §4.1.2 + §4.1.3: Quoted-string Local-part with IPv4 address-literal
 TEST(Email, valid_quoted_local_with_ipv4_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"foo\"@[192.168.1.1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"foo\"@[192.168.1.1]"));
 }
 
 // RFC 5321 §4.1.2 + §4.1.3: Quoted-string Local-part with IPv6 address-literal
 TEST(Email, valid_quoted_local_with_ipv6_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"foo\"@[IPv6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"foo\"@[IPv6:::1]"));
 }
 
 // RFC 5321 §4.1.2 + §4.1.3: Quoted-string Local-part with General-address-
 // literal
 TEST(Email, valid_quoted_local_with_general_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"foo\"@[X400:bar]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"foo\"@[X400:bar]"));
 }
 
 // RFC 5321 §4.1.2 + §4.1.3: Dot-string Local-part with IPv4 address-literal
 TEST(Email, valid_dot_string_with_ipv4_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("foo@[192.168.1.1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("foo@[192.168.1.1]"));
 }
 
 // RFC 5321 §4.1.2 + §4.1.3: Dot-string Local-part with IPv6 address-literal
 TEST(Email, valid_dot_string_with_ipv6_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("foo@[IPv6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("foo@[IPv6:::1]"));
 }
 
 // RFC 5321 §4.1.2 + §4.1.3: Dot-string Local-part with General-address-literal
 TEST(Email, valid_dot_string_with_general_literal) {
   EXPECT_TRUE(sourcemeta::core::is_email("foo@[X400:bar]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("foo@[X400:bar]"));
 }
 
 // RFC 5321 §4.5.3.1.2: an address-literal whose total length equals the
@@ -960,6 +1158,8 @@ TEST(Email, valid_dot_string_with_general_literal) {
 TEST(Email, valid_address_literal_length_255) {
   EXPECT_TRUE(
       sourcemeta::core::is_email("a@[X:" + std::string(251, 'a') + "]"));
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("a@[X:" + std::string(251, 'a') + "]"));
 }
 
 // RFC 5321 §4.5.3.1.2: an address-literal one octet past the 255-octet cap is
@@ -967,164 +1167,202 @@ TEST(Email, valid_address_literal_length_255) {
 TEST(Email, invalid_address_literal_length_256) {
   EXPECT_FALSE(
       sourcemeta::core::is_email("a@[X:" + std::string(252, 'a') + "]"));
+  EXPECT_FALSE(
+      sourcemeta::core::is_idn_email("a@[X:" + std::string(252, 'a') + "]"));
 }
 
 // RFC 5321 §4.1.2: Mailbox cannot be empty
 TEST(Email, invalid_empty_input) {
   EXPECT_FALSE(sourcemeta::core::is_email(""));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(""));
 }
 
 // RFC 5321 §4.1.2: SP is not in atext, qtextSMTP, or Ldh-str outside a
 // Quoted-string
 TEST(Email, invalid_whitespace_only) {
   EXPECT_FALSE(sourcemeta::core::is_email("   "));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("   "));
 }
 
 // RFC 5321 §4.1.2: leading SP is not part of Dot-string or Quoted-string
 TEST(Email, invalid_leading_space) {
   EXPECT_FALSE(sourcemeta::core::is_email(" a@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(" a@b"));
 }
 
 // RFC 5321 §4.1.2 Domain: trailing SP is not in Ldh-str
 TEST(Email, invalid_trailing_space) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b "));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b "));
 }
 
 // RFC 5321 §4.1.2 Domain: NUL is not in Ldh-str
 TEST(Email, invalid_nul_in_domain) {
   EXPECT_FALSE(sourcemeta::core::is_email(std::string_view{"a@b\x00c", 5}));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(std::string_view{"a@b\x00c", 5}));
 }
 
 // RFC 5321 §4.1.2: CRLF bytes are not in the Mailbox alphabet
 TEST(Email, invalid_crlf) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b\r\n"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b\r\n"));
 }
 
 // RFC 5321 §4.1.2: LF is not in the Mailbox alphabet
-TEST(Email, invalid_lf) { EXPECT_FALSE(sourcemeta::core::is_email("a@b\n")); }
+TEST(Email, invalid_lf) {
+  EXPECT_FALSE(sourcemeta::core::is_email("a@b\n"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b\n"));
+}
 
 // RFC 5321 §4.1.2: TAB is not in atext or Ldh-str
 TEST(Email, invalid_tab_in_local) {
   EXPECT_FALSE(sourcemeta::core::is_email("a\tb@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a\tb@c"));
 }
 
 // RFC 5321 §4.1.2 Domain: TAB is not in Ldh-str
 TEST(Email, invalid_tab_in_domain) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b\tc"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b\tc"));
 }
 
 // RFC 5321 §4.1.2: two consecutive "@" produce an empty Local-part on the
 // left and a Dot-string Atom on the right that contains "@"
 TEST(Email, invalid_consecutive_at_signs) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@@b"));
 }
 
 // RFC 5321 §4.1.2: a single Quoted-string byte must still be followed by the
 // "@" boundary and a Domain
 TEST(Email, invalid_quoted_only) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"a\""));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"a\""));
 }
 
 // RFC 5321 §4.1.2: a lone DQUOTE is an unterminated Quoted-string
 TEST(Email, invalid_lone_dquote) {
   EXPECT_FALSE(sourcemeta::core::is_email("\""));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\""));
 }
 
 // RFC 5321 §4.1.2: a lone "@" has neither Local-part nor Domain
-TEST(Email, invalid_lone_at) { EXPECT_FALSE(sourcemeta::core::is_email("@")); }
+TEST(Email, invalid_lone_at) {
+  EXPECT_FALSE(sourcemeta::core::is_email("@"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("@"));
+}
 
 // RFC 5321 §4.1.2: minimum-length Mailbox is a single atext byte plus "@"
 // plus a single Let-dig byte
 TEST(Email, valid_minimum_length_mailbox) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@b"));
 }
 
 // RFC 5321 §4.1.3: a single dcontent byte is the minimum 1*dcontent
 TEST(Email, valid_general_literal_minimum_dcontent) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[A:B]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[A:B]"));
 }
 
 // RFC 5321 §4.1.2 ALPHA upper bound: "Z" (%d90) is in atext
 TEST(Email, valid_atext_alpha_upper_Z) {
   EXPECT_TRUE(sourcemeta::core::is_email("Z@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("Z@b"));
 }
 
 // RFC 5321 §4.1.2 ALPHA upper bound: "z" (%d122) is in atext
 TEST(Email, valid_atext_alpha_lower_z) {
   EXPECT_TRUE(sourcemeta::core::is_email("z@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("z@b"));
 }
 
 // RFC 5321 §4.1.2 DIGIT upper bound: "9" (%d57) is in atext
 TEST(Email, valid_atext_digit_nine) {
   EXPECT_TRUE(sourcemeta::core::is_email("9@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("9@b"));
 }
 
 // RFC 5321 §4.1.2: Atom = 1*atext, an all-digit atom is permitted
 TEST(Email, valid_dot_string_numeric_atom) {
   EXPECT_TRUE(sourcemeta::core::is_email("123@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("123@b"));
 }
 
 // RFC 5321 §4.1.2: atext mixes letters, digits, and specials in one atom
 TEST(Email, valid_dot_string_alpha_digit_special_mix) {
   EXPECT_TRUE(sourcemeta::core::is_email("aB1!c2#@d"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("aB1!c2#@d"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP %d93-126 excludes %d127 (DEL)
 TEST(Email, invalid_quoted_qtext_del_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"\x7f\"@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\x7f\"@b"));
 }
 
 // RFC 5321 §4.1.2: qtextSMTP excludes %d9 (TAB)
 TEST(Email, invalid_quoted_qtext_tab) {
   EXPECT_FALSE(sourcemeta::core::is_email("\"a\tb\"@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"a\tb\"@c"));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body excludes NUL (%d0)
 TEST(Email, invalid_quoted_pair_nul_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email(std::string_view{"\"\\\x00\"@b", 6}));
+  EXPECT_FALSE(
+      sourcemeta::core::is_idn_email(std::string_view{"\"\\\x00\"@b", 6}));
 }
 
 // RFC 5321 §4.1.2: quoted-pairSMTP body permits "@" (%d64)
 TEST(Email, valid_quoted_pair_at_sign) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\\@\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\\@\"@b"));
 }
 
 // RFC 5321 §4.1.2: two consecutive quoted-pairs back-to-back inside a
 // Quoted-string
 TEST(Email, valid_quoted_two_consecutive_pairs) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"\\\\\\\"\"@b"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\\\\\\\"\"@b"));
 }
 
 // RFC 5321 §4.1.2 Domain: a single Let-dig digit is a valid sub-domain
 TEST(Email, valid_domain_single_digit) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@1"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@1"));
 }
 
 // RFC 5321 §4.1.2: a single atext byte is not a valid Mailbox without "@"
 TEST(Email, invalid_single_atext_no_at) {
   EXPECT_FALSE(sourcemeta::core::is_email("a"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a"));
 }
 
 // RFC 5321 §4.1.2 Domain: a stray "]" with no opening "[" cannot match
 // address-literal and "]" is not in Ldh-str
 TEST(Email, invalid_unbalanced_closing_bracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b]"));
 }
 
 // RFC 5321 §4.1.2 Domain: "[" embedded in a Domain is not in Ldh-str
 TEST(Email, invalid_bracket_in_middle_of_domain) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b[c]d"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b[c]d"));
 }
 
 // RFC 5321 §4.1.3: a domain consisting of just "[" never closes the
 // address-literal
 TEST(Email, invalid_domain_just_open_bracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@["));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@["));
 }
 
 // RFC 5321 §4.1.2 Domain: a domain consisting of just "]" is not Ldh-str
 TEST(Email, invalid_domain_just_close_bracket) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@]"));
 }
 
 // RFC 5321 §4.1.3: "::1" between brackets has no "IPv6:" prefix and an empty
@@ -1132,98 +1370,117 @@ TEST(Email, invalid_domain_just_close_bracket) {
 // reject it
 TEST(Email, invalid_bracket_just_ipv6_addr_no_prefix) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[::1]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[::1]"));
 }
 
 // RFC 5321 §4.1.3: a bracketed word without ":" cannot match General, and
 // without digits cannot match IPv4
 TEST(Email, invalid_bracket_with_plain_word) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[hello]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[hello]"));
 }
 
 // RFC 5321 §4.1.3: leading SP inside the brackets fails IPv4 (non-digit) and
 // fails General (Standardized-tag has no SP)
 TEST(Email, invalid_bracket_with_leading_space) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[ 1.2.3.4]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[ 1.2.3.4]"));
 }
 
 // RFC 5321 §4.1.3 + RFC 5234 §3.2: a case-insensitive "IPv6:" match that
 // fails IPv6-addr still falls through to General-address-literal
 TEST(Email, valid_lowercase_ipv6_fallthrough_to_general) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[ipv6:not-an-address]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[ipv6:not-an-address]"));
 }
 
 // RFC 5321 §4.1.2 Ldh-str: leading "-" before another "-" before Let-dig is
 // still a valid Ldh-str
 TEST(Email, valid_general_literal_multiple_leading_hyphens) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[--a:b]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[--a:b]"));
 }
 
 // RFC 5321 §4.1.3: any Ldh-str is a valid Standardized-tag per the grammar,
 // including ones not registered with IANA such as "IPv7"
 TEST(Email, valid_general_literal_ipv7_like_tag) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[IPv7:foo]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[IPv7:foo]"));
 }
 
 // RFC 5321 §4.1.2 Ldh-str alphabet excludes ".", so a tag with "." cannot
 // match Standardized-tag
 TEST(Email, invalid_general_tag_with_dot) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[a.b:c]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[a.b:c]"));
 }
 
 // RFC 5321 §4.1.3: General-address-literal content of just ":" is a single
 // dcontent byte (%d58 is in %d33-90)
 TEST(Email, valid_general_literal_content_just_colon) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@[a::]"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@[a::]"));
 }
 
 // RFC 5321 §4.1.3 + RFC 5234 §3.2: bracketed input where the first colon
 // produces an empty tag fails the Standardized-tag rule
 TEST(Email, invalid_bracket_empty_tag) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[:foo]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[:foo]"));
 }
 
 // RFC 5321 §4.1.2 Domain: an address-literal whose Domain branch tries
 // is_hostname must reject a stray "[" inside what would otherwise be Ldh-str
 TEST(Email, invalid_domain_open_bracket_inside) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@b[c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b[c"));
 }
 
 // RFC 5321 §4.1.2 Dot-string: a Quoted-string opener "\"" inside an
 // otherwise Dot-string Local-part is not in atext
 TEST(Email, invalid_dquote_inside_dot_string) {
   EXPECT_FALSE(sourcemeta::core::is_email("a\"b@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a\"b@c"));
 }
 
 // RFC 5321 §4.1.2 Mailbox: a Mailbox cannot start with the boundary "@"
 TEST(Email, invalid_starts_with_at) {
   EXPECT_FALSE(sourcemeta::core::is_email("@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("@example.com"));
 }
 
 // RFC 5321 §4.1.2 Mailbox: a Mailbox cannot end with the boundary "@"
 TEST(Email, invalid_ends_with_at) {
   EXPECT_FALSE(sourcemeta::core::is_email("user@"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("user@"));
 }
 
 // RFC 5321 §4.1.2: Local-part = 64 octets via Dot-string that includes "."
 TEST(Email, valid_local_part_length_64_with_dots) {
   EXPECT_TRUE(sourcemeta::core::is_email(std::string(31, 'a') + "." +
                                          std::string(32, 'b') + "@c"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(std::string(31, 'a') + "." +
+                                             std::string(32, 'b') + "@c"));
 }
 
 // RFC 5321 §4.5.3.1.1: 65-octet Dot-string Local-part that contains "."
 TEST(Email, invalid_local_part_length_65_with_dots) {
   EXPECT_FALSE(sourcemeta::core::is_email(std::string(32, 'a') + "." +
                                           std::string(32, 'b') + "@c"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(std::string(32, 'a') + "." +
+                                              std::string(32, 'b') + "@c"));
 }
 
 // RFC 5321 §4.1.2: a Domain consisting of many short labels still parses
 TEST(Email, valid_domain_many_short_labels) {
   EXPECT_TRUE(sourcemeta::core::is_email("a@a.b.c.d.e.f.g.h.i.j.k"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@a.b.c.d.e.f.g.h.i.j.k"));
 }
 
 // RFC 5321 §4.1.3: dcontent excludes DEL (%d127)
 TEST(Email, invalid_general_dcontent_del_byte) {
   EXPECT_FALSE(sourcemeta::core::is_email("a@[Tag:\x7f]"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@[Tag:\x7f]"));
 }
 
 // RFC 5321 §4.1.3 + §4.5.3.1.2: a General-address-literal whose Domain total
@@ -1232,6 +1489,8 @@ TEST(Email, invalid_general_dcontent_del_byte) {
 TEST(Email, valid_general_literal_inner_at_cap) {
   EXPECT_TRUE(
       sourcemeta::core::is_email("a@[Tag:" + std::string(249, 'x') + "]"));
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("a@[Tag:" + std::string(249, 'x') + "]"));
 }
 
 // RFC 5321 §4.5.3.1.2: General-address-literal one octet past the 255-octet
@@ -1239,15 +1498,19 @@ TEST(Email, valid_general_literal_inner_at_cap) {
 TEST(Email, invalid_general_literal_inner_over_cap) {
   EXPECT_FALSE(
       sourcemeta::core::is_email("a@[Tag:" + std::string(250, 'x') + "]"));
+  EXPECT_FALSE(
+      sourcemeta::core::is_idn_email("a@[Tag:" + std::string(250, 'x') + "]"));
 }
 
 // RFC 5321 §4.1.2: a single quoted byte plus minimal Domain
 TEST(Email, valid_quoted_single_letter_then_minimal_domain) {
   EXPECT_TRUE(sourcemeta::core::is_email("\"x\"@y"));
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"x\"@y"));
 }
 
 // RFC 5321 §4.1.2: Dot-string ending with the boundary "@" right after the
 // dot has no terminating Atom
 TEST(Email, invalid_dot_then_at) {
   EXPECT_FALSE(sourcemeta::core::is_email("a.@b"));
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a.@b"));
 }
diff --git a/test/email/idn_email_test.cc b/test/email/idn_email_test.cc
new file mode 100644
index 0000000000..51769b1514
--- /dev/null
+++ b/test/email/idn_email_test.cc
@@ -0,0 +1,399 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/email.h>
+
+#include <string>
+
+// example@example.test rendered in Hangul (RFC 6531 §3.3)
+// Bytes: 실=EC8BA4 례=EBA180 @=40 실=EC8BA4 례=EBA180 .=2E 테=ED858C 스=EC8AA4
+// 트=ED8AB8
+TEST(IdnEmail, valid_hangul_example_at_example_test) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(
+      "\xec\x8b\xa4\xeb\xa1\x80"
+      "@"
+      "\xec\x8b\xa4\xeb\xa1\x80.\xed\x85\x8c\xec\x8a\xa4\xed\x8a\xb8"));
+  EXPECT_FALSE(sourcemeta::core::is_email(
+      "\xec\x8b\xa4\xeb\xa1\x80"
+      "@"
+      "\xec\x8b\xa4\xeb\xa1\x80.\xed\x85\x8c\xec\x8a\xa4\xed\x8a\xb8"));
+}
+
+TEST(IdnEmail, invalid_bare_number) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("2962"));
+  EXPECT_FALSE(sourcemeta::core::is_email("2962"));
+}
+
+TEST(IdnEmail, valid_typical_ascii_address) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("joe.bloggs@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email("joe.bloggs@example.com"));
+}
+
+// RFC 5321 §4.1.2: ASCII Dot-string is a subset of the extended grammar
+TEST(IdnEmail, valid_ascii_single_atom) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@b"));
+  EXPECT_TRUE(sourcemeta::core::is_email("a@b"));
+}
+
+TEST(IdnEmail, valid_ascii_two_atoms) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a.b@c"));
+  EXPECT_TRUE(sourcemeta::core::is_email("a.b@c"));
+}
+
+TEST(IdnEmail, valid_ascii_many_atoms) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a.b.c.d@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email("a.b.c.d@example.com"));
+}
+
+TEST(IdnEmail, valid_ascii_atext_symbols) {
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("a!#$%&'*+-/=?^_`{|}~@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email("a!#$%&'*+-/=?^_`{|}~@example.com"));
+}
+
+TEST(IdnEmail, valid_ascii_uppercase_local) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("ABC@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email("ABC@example.com"));
+}
+
+TEST(IdnEmail, valid_ascii_digit_local) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("123@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email("123@example.com"));
+}
+
+// RFC 6531 §3.3: atext =/ UTF8-non-ascii (2-byte: U+03B1 GREEK SMALL ALPHA)
+TEST(IdnEmail, valid_local_two_byte_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\xce\xb1@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xce\xb1@b"));
+}
+
+// RFC 6531 §3.3: atext =/ UTF8-non-ascii (3-byte: U+4E2D CJK 中)
+TEST(IdnEmail, valid_local_three_byte_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\xe4\xb8\xad@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xe4\xb8\xad@b"));
+}
+
+// RFC 6531 §3.3: atext =/ UTF8-non-ascii (4-byte: U+1F600 GRINNING FACE)
+TEST(IdnEmail, valid_local_four_byte_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\xf0\x9f\x98\x80@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xf0\x9f\x98\x80@b"));
+}
+
+TEST(IdnEmail, valid_local_mixed_ascii_and_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("user.\xce\xb1@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("user.\xce\xb1@example.com"));
+}
+
+TEST(IdnEmail, valid_local_multi_atom_with_utf8) {
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("\xe4\xb8\xad.\xce\xb1.user@example.com"));
+  EXPECT_FALSE(
+      sourcemeta::core::is_email("\xe4\xb8\xad.\xce\xb1.user@example.com"));
+}
+
+TEST(IdnEmail, valid_local_utf8_only_two_atoms) {
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("\xce\xb1.\xe4\xb8\xad@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xce\xb1.\xe4\xb8\xad@example.com"));
+}
+
+// RFC 6531 §3.3: sub-domain =/ U-label (2-byte U-label only)
+TEST(IdnEmail, valid_domain_two_byte_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@\xce\xb1"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@\xce\xb1"));
+}
+
+// RFC 6531 §3.3: sub-domain =/ U-label (3-byte U-label only)
+TEST(IdnEmail, valid_domain_three_byte_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@\xe4\xb8\xad"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@\xe4\xb8\xad"));
+}
+
+TEST(IdnEmail, valid_domain_mixed_labels) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@example.\xce\xb1.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@example.\xce\xb1.com"));
+}
+
+TEST(IdnEmail, valid_domain_utf8_with_hyphen) {
+  // U-labels may contain hyphens; just not at the start/end of a label
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@\xce\xb1-\xe4\xb8\xad"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@\xce\xb1-\xe4\xb8\xad"));
+}
+
+TEST(IdnEmail, valid_domain_many_labels) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@a.b.c.\xce\xb1.d.e"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@a.b.c.\xce\xb1.d.e"));
+}
+
+// RFC 5321 §4.1.2: Quoted-string with ASCII-only content
+TEST(IdnEmail, valid_quoted_ascii) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a b\"@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email("\"a b\"@example.com"));
+}
+
+// RFC 6531 §3.3: qtextSMTP =/ UTF8-non-ascii
+TEST(IdnEmail, valid_quoted_with_two_byte_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\xce\xb1\"@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\"\xce\xb1\"@example.com"));
+}
+
+TEST(IdnEmail, valid_quoted_with_three_byte_utf8) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"\xe4\xb8\xad\"@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\"\xe4\xb8\xad\"@example.com"));
+}
+
+TEST(IdnEmail, valid_quoted_with_four_byte_utf8) {
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("\"\xf0\x9f\x98\x80\"@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\"\xf0\x9f\x98\x80\"@example.com"));
+}
+
+TEST(IdnEmail, valid_quoted_mixed_ascii_and_utf8) {
+  EXPECT_TRUE(
+      sourcemeta::core::is_idn_email("\"\xce\xb1 \xe4\xb8\xad\"@example.com"));
+  EXPECT_FALSE(
+      sourcemeta::core::is_email("\"\xce\xb1 \xe4\xb8\xad\"@example.com"));
+}
+
+TEST(IdnEmail, valid_quoted_with_quoted_pair) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\"a\\\"b\"@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email("\"a\\\"b\"@example.com"));
+}
+
+// RFC 5321 §4.1.3: address-literal IPv4 stays ASCII (no IDNA applies)
+TEST(IdnEmail, valid_address_literal_ipv4) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("user@[192.168.1.1]"));
+  EXPECT_TRUE(sourcemeta::core::is_email("user@[192.168.1.1]"));
+}
+
+TEST(IdnEmail, valid_address_literal_ipv6) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("user@[IPv6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_email("user@[IPv6:::1]"));
+}
+
+TEST(IdnEmail, valid_address_literal_ipv6_lowercase_tag) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("user@[ipv6:::1]"));
+  EXPECT_TRUE(sourcemeta::core::is_email("user@[ipv6:::1]"));
+}
+
+TEST(IdnEmail, valid_address_literal_with_utf8_local) {
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("\xce\xb1@[192.168.1.1]"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xce\xb1@[192.168.1.1]"));
+}
+
+// RFC 5321 §4.5.3.1.1: Local-part is allowed up to 64 octets
+TEST(IdnEmail, valid_local_at_octet_limit) {
+  const std::string local(64, 'a');
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(local + "@example.com"));
+  EXPECT_TRUE(sourcemeta::core::is_email(local + "@example.com"));
+}
+
+TEST(IdnEmail, valid_local_at_octet_limit_with_utf8) {
+  // 21 Greek alpha (CE B1 = 2 bytes each) = 42 bytes, plus 22 ASCII 'a' = 64
+  std::string local;
+  for (int index = 0; index < 21; ++index) {
+    local.append("\xce\xb1");
+  }
+  local.append(22, 'a');
+  EXPECT_EQ(local.size(), 64u);
+  EXPECT_TRUE(sourcemeta::core::is_idn_email(local + "@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email(local + "@example.com"));
+}
+
+TEST(IdnEmail, invalid_missing_at) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("plain"));
+  EXPECT_FALSE(sourcemeta::core::is_email("plain"));
+}
+
+TEST(IdnEmail, invalid_empty_local) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("@example.com"));
+}
+
+TEST(IdnEmail, invalid_empty_domain) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("user@"));
+  EXPECT_FALSE(sourcemeta::core::is_email("user@"));
+}
+
+TEST(IdnEmail, invalid_empty) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(""));
+  EXPECT_FALSE(sourcemeta::core::is_email(""));
+}
+
+TEST(IdnEmail, invalid_two_at_signs) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@b@c"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@b@c"));
+}
+
+TEST(IdnEmail, invalid_local_leading_dot) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(".user@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email(".user@example.com"));
+}
+
+TEST(IdnEmail, invalid_local_trailing_dot) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("user.@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("user.@example.com"));
+}
+
+TEST(IdnEmail, invalid_local_consecutive_dots) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a..b@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a..b@example.com"));
+}
+
+TEST(IdnEmail, invalid_local_just_dot) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(".@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email(".@example.com"));
+}
+
+// RFC 6532 §3.1: lone continuation byte 0xBF is not the start of UTF-8
+TEST(IdnEmail, invalid_lone_continuation_byte) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xbf@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xbf@b"));
+}
+
+// RFC 6532 §3.1: 2-byte starter with no continuation byte
+TEST(IdnEmail, invalid_truncated_two_byte_at_end_of_local) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xce@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xce@b"));
+}
+
+// RFC 6532 §3.1: %xE0 %x80-9F is overlong (codepoints < U+0800)
+TEST(IdnEmail, invalid_overlong_three_byte) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xe0\x80\xa0@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xe0\x80\xa0@b"));
+}
+
+// RFC 6532 §3.1: U+D800 surrogate is forbidden
+TEST(IdnEmail, invalid_surrogate_codepoint_in_local) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xed\xa0\x80@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xed\xa0\x80@b"));
+}
+
+// RFC 6532 §3.1: codepoints above U+10FFFF are forbidden
+TEST(IdnEmail, invalid_above_max_codepoint_in_local) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xf4\x90\x80\x80@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xf4\x90\x80\x80@b"));
+}
+
+// RFC 6532 §3.1: 4-byte starter with truncated continuation
+TEST(IdnEmail, invalid_truncated_four_byte_in_local) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xf0\x9f\x98@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xf0\x9f\x98@b"));
+}
+
+// RFC 6532 §3.1: %xC0 is a forbidden lead byte (overlong U+0000)
+TEST(IdnEmail, invalid_overlong_c0_in_local) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xc0\x80@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xc0\x80@b"));
+}
+
+// RFC 6532 §3.1: %xF5 is not a valid lead byte
+TEST(IdnEmail, invalid_lead_f5_in_local) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\xf5\x80\x80\x80@b"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\xf5\x80\x80\x80@b"));
+}
+
+TEST(IdnEmail, invalid_invalid_utf8_in_domain) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@\xc0\x80"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@\xc0\x80"));
+}
+
+TEST(IdnEmail, invalid_surrogate_in_domain) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@\xed\xa0\x80"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@\xed\xa0\x80"));
+}
+
+TEST(IdnEmail, invalid_lone_continuation_in_domain) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@\xbf"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@\xbf"));
+}
+
+TEST(IdnEmail, invalid_invalid_utf8_in_quoted) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\xc0\x80\"@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\"\xc0\x80\"@example.com"));
+}
+
+TEST(IdnEmail, invalid_truncated_utf8_in_quoted) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("\"\xce\"@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("\"\xce\"@example.com"));
+}
+
+// RFC 6531 §3.3: domain label cannot start with a hyphen
+TEST(IdnEmail, invalid_domain_leading_hyphen) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@-example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@-example.com"));
+}
+
+// RFC 6531 §3.3: domain label cannot end with a hyphen
+TEST(IdnEmail, invalid_domain_trailing_hyphen) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@example-.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@example-.com"));
+}
+
+TEST(IdnEmail, invalid_domain_label_trailing_hyphen_with_utf8) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@\xce\xb1-"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@\xce\xb1-"));
+}
+
+TEST(IdnEmail, invalid_domain_trailing_dot) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@example.com."));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@example.com."));
+}
+
+TEST(IdnEmail, invalid_domain_empty_label_in_middle) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@example..com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@example..com"));
+}
+
+TEST(IdnEmail, invalid_domain_leading_dot) {
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@.example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@.example.com"));
+}
+
+// RFC 5321 §4.5.3.1.1: Local-part > 64 octets is invalid
+TEST(IdnEmail, invalid_local_one_over_octet_limit) {
+  const std::string local(65, 'a');
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(local + "@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email(local + "@example.com"));
+}
+
+TEST(IdnEmail, invalid_local_one_over_octet_limit_with_utf8) {
+  // 21 alpha (42 bytes) + 23 'a' = 65 bytes
+  std::string local;
+  for (int index = 0; index < 21; ++index) {
+    local.append("\xce\xb1");
+  }
+  local.append(23, 'a');
+  EXPECT_EQ(local.size(), 65u);
+  EXPECT_FALSE(sourcemeta::core::is_idn_email(local + "@example.com"));
+  EXPECT_FALSE(sourcemeta::core::is_email(local + "@example.com"));
+}
+
+// RFC 1035 §2.3.4: single label > 63 octets is invalid
+TEST(IdnEmail, invalid_domain_label_too_long) {
+  const std::string label(64, 'a');
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@" + label));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@" + label));
+}
+
+TEST(IdnEmail, valid_domain_label_at_max_length) {
+  const std::string label(63, 'a');
+  EXPECT_TRUE(sourcemeta::core::is_idn_email("a@" + label));
+  EXPECT_TRUE(sourcemeta::core::is_email("a@" + label));
+}
+
+// RFC 5321 §4.5.3.1.2: total domain > 255 octets is invalid.
+// Construction must avoid trailing-dot and per-label (>63) confounds: 5
+// labels of 51/51/51/51/48 'a' chars separated by 4 dots = 256 octets, no
+// trailing dot, every label within the 63-octet RFC 1035 cap
+TEST(IdnEmail, invalid_domain_total_too_long) {
+  std::string domain;
+  for (int index = 0; index < 4; ++index) {
+    domain.append(51, 'a');
+    domain.push_back('.');
+  }
+  domain.append(48, 'a');
+  EXPECT_EQ(domain.size(), 256u);
+  EXPECT_NE(domain.back(), '.');
+  EXPECT_FALSE(sourcemeta::core::is_idn_email("a@" + domain));
+  EXPECT_FALSE(sourcemeta::core::is_email("a@" + domain));
+}
diff --git a/test/unicode/CMakeLists.txt b/test/unicode/CMakeLists.txt
index ca829ce501..18ef536775 100644
--- a/test/unicode/CMakeLists.txt
+++ b/test/unicode/CMakeLists.txt
@@ -1,5 +1,13 @@
 sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME unicode
-  SOURCES unicode_test.cc)
+  SOURCES
+    codepoint_to_utf8_test.cc
+    utf8_to_utf32_test.cc
+    utf8_codepoint_length_test.cc
+    utf8_lead_byte_size_test.cc
+    utf8_codepoint_byte_count_test.cc
+    is_utf8_continuation_test.cc
+    is_surrogate_test.cc
+    is_valid_codepoint_test.cc)
 
 target_link_libraries(sourcemeta_core_unicode_unit
   PRIVATE sourcemeta::core::unicode)
diff --git a/test/unicode/codepoint_to_utf8_test.cc b/test/unicode/codepoint_to_utf8_test.cc
new file mode 100644
index 0000000000..f1212cfba0
--- /dev/null
+++ b/test/unicode/codepoint_to_utf8_test.cc
@@ -0,0 +1,125 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+#include <sstream> // std::ostringstream
+#include <string>  // std::string
+
+TEST(Unicode_codepoint_to_utf8, ascii_letter) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x41), "A");
+}
+
+TEST(Unicode_codepoint_to_utf8, ascii_null) {
+  const std::string expected(1, '\0');
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x00), expected);
+}
+
+TEST(Unicode_codepoint_to_utf8, ascii_max) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x7F), "\x7F");
+}
+
+TEST(Unicode_codepoint_to_utf8, two_byte_min) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x80), "\xC2\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, two_byte_latin_e_acute) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0xE9), "\xC3\xA9");
+}
+
+TEST(Unicode_codepoint_to_utf8, two_byte_max) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x7FF), "\xDF\xBF");
+}
+
+TEST(Unicode_codepoint_to_utf8, three_byte_min) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x800), "\xE0\xA0\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, three_byte_cjk) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x4E16), "\xE4\xB8\x96");
+}
+
+TEST(Unicode_codepoint_to_utf8, three_byte_max) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0xFFFF), "\xEF\xBF\xBF");
+}
+
+TEST(Unicode_codepoint_to_utf8, four_byte_min) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x10000), "\xF0\x90\x80\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, four_byte_emoji) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x1F600), "\xF0\x9F\x98\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, four_byte_max) {
+  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x10FFFF), "\xF4\x8F\xBF\xBF");
+}
+
+TEST(Unicode_codepoint_to_utf8, stream_ascii_letter) {
+  std::ostringstream output;
+  sourcemeta::core::codepoint_to_utf8(0x41, output);
+  EXPECT_EQ(output.str(), "A");
+}
+
+TEST(Unicode_codepoint_to_utf8, stream_two_byte_latin_e_acute) {
+  std::ostringstream output;
+  sourcemeta::core::codepoint_to_utf8(0xE9, output);
+  EXPECT_EQ(output.str(), "\xC3\xA9");
+}
+
+TEST(Unicode_codepoint_to_utf8, stream_three_byte_cjk) {
+  std::ostringstream output;
+  sourcemeta::core::codepoint_to_utf8(0x4E16, output);
+  EXPECT_EQ(output.str(), "\xE4\xB8\x96");
+}
+
+TEST(Unicode_codepoint_to_utf8, stream_four_byte_emoji) {
+  std::ostringstream output;
+  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
+  EXPECT_EQ(output.str(), "\xF0\x9F\x98\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, stream_multiple_codepoints) {
+  std::ostringstream output;
+  sourcemeta::core::codepoint_to_utf8(0x48, output);
+  sourcemeta::core::codepoint_to_utf8(0xE9, output);
+  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
+  EXPECT_EQ(output.str(), "H\xC3\xA9\xF0\x9F\x98\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, string_ascii_letter) {
+  std::string output;
+  sourcemeta::core::codepoint_to_utf8(0x41, output);
+  EXPECT_EQ(output, "A");
+}
+
+TEST(Unicode_codepoint_to_utf8, string_two_byte_latin_e_acute) {
+  std::string output;
+  sourcemeta::core::codepoint_to_utf8(0xE9, output);
+  EXPECT_EQ(output, "\xC3\xA9");
+}
+
+TEST(Unicode_codepoint_to_utf8, string_three_byte_cjk) {
+  std::string output;
+  sourcemeta::core::codepoint_to_utf8(0x4E16, output);
+  EXPECT_EQ(output, "\xE4\xB8\x96");
+}
+
+TEST(Unicode_codepoint_to_utf8, string_four_byte_emoji) {
+  std::string output;
+  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
+  EXPECT_EQ(output, "\xF0\x9F\x98\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, string_multiple_codepoints) {
+  std::string output;
+  sourcemeta::core::codepoint_to_utf8(0x48, output);
+  sourcemeta::core::codepoint_to_utf8(0xE9, output);
+  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
+  EXPECT_EQ(output, "H\xC3\xA9\xF0\x9F\x98\x80");
+}
+
+TEST(Unicode_codepoint_to_utf8, string_four_byte_max) {
+  std::string output;
+  sourcemeta::core::codepoint_to_utf8(0x10FFFF, output);
+  EXPECT_EQ(output, "\xF4\x8F\xBF\xBF");
+}
diff --git a/test/unicode/is_surrogate_test.cc b/test/unicode/is_surrogate_test.cc
new file mode 100644
index 0000000000..7c0e243dcb
--- /dev/null
+++ b/test/unicode/is_surrogate_test.cc
@@ -0,0 +1,55 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+TEST(Unicode_is_surrogate, null) {
+  EXPECT_FALSE(sourcemeta::core::is_surrogate(0x0000));
+}
+
+TEST(Unicode_is_surrogate, ascii_letter) {
+  EXPECT_FALSE(sourcemeta::core::is_surrogate(0x0041));
+}
+
+TEST(Unicode_is_surrogate, just_below_low_surrogate) {
+  EXPECT_FALSE(sourcemeta::core::is_surrogate(0xD7FF));
+}
+
+TEST(Unicode_is_surrogate, low_surrogate_low_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_surrogate(0xD800));
+}
+
+TEST(Unicode_is_surrogate, low_surrogate_mid) {
+  EXPECT_TRUE(sourcemeta::core::is_surrogate(0xDA00));
+}
+
+TEST(Unicode_is_surrogate, low_surrogate_high_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_surrogate(0xDBFF));
+}
+
+TEST(Unicode_is_surrogate, high_surrogate_low_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_surrogate(0xDC00));
+}
+
+TEST(Unicode_is_surrogate, high_surrogate_mid) {
+  EXPECT_TRUE(sourcemeta::core::is_surrogate(0xDE00));
+}
+
+TEST(Unicode_is_surrogate, high_surrogate_high_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_surrogate(0xDFFF));
+}
+
+TEST(Unicode_is_surrogate, just_above_high_surrogate) {
+  EXPECT_FALSE(sourcemeta::core::is_surrogate(0xE000));
+}
+
+TEST(Unicode_is_surrogate, max_bmp) {
+  EXPECT_FALSE(sourcemeta::core::is_surrogate(0xFFFF));
+}
+
+TEST(Unicode_is_surrogate, emoji_grinning_face) {
+  EXPECT_FALSE(sourcemeta::core::is_surrogate(0x1F600));
+}
+
+TEST(Unicode_is_surrogate, max_codepoint) {
+  EXPECT_FALSE(sourcemeta::core::is_surrogate(0x10FFFF));
+}
diff --git a/test/unicode/is_utf8_continuation_test.cc b/test/unicode/is_utf8_continuation_test.cc
new file mode 100644
index 0000000000..afc69a08a7
--- /dev/null
+++ b/test/unicode/is_utf8_continuation_test.cc
@@ -0,0 +1,51 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+TEST(Unicode_is_utf8_continuation, ascii_null_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_utf8_continuation(0x00));
+}
+
+TEST(Unicode_is_utf8_continuation, ascii_letter_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_utf8_continuation(0x41));
+}
+
+TEST(Unicode_is_utf8_continuation, ascii_high_boundary_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_utf8_continuation(0x7F));
+}
+
+TEST(Unicode_is_utf8_continuation, low_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_utf8_continuation(0x80));
+}
+
+TEST(Unicode_is_utf8_continuation, just_above_ascii) {
+  EXPECT_TRUE(sourcemeta::core::is_utf8_continuation(0x81));
+}
+
+TEST(Unicode_is_utf8_continuation, mid_range_a0) {
+  EXPECT_TRUE(sourcemeta::core::is_utf8_continuation(0xA0));
+}
+
+TEST(Unicode_is_utf8_continuation, mid_range_b0) {
+  EXPECT_TRUE(sourcemeta::core::is_utf8_continuation(0xB0));
+}
+
+TEST(Unicode_is_utf8_continuation, high_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_utf8_continuation(0xBF));
+}
+
+TEST(Unicode_is_utf8_continuation, two_byte_lead_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_utf8_continuation(0xC0));
+}
+
+TEST(Unicode_is_utf8_continuation, three_byte_lead_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_utf8_continuation(0xE0));
+}
+
+TEST(Unicode_is_utf8_continuation, four_byte_lead_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_utf8_continuation(0xF0));
+}
+
+TEST(Unicode_is_utf8_continuation, max_byte_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_utf8_continuation(0xFF));
+}
diff --git a/test/unicode/is_valid_codepoint_test.cc b/test/unicode/is_valid_codepoint_test.cc
new file mode 100644
index 0000000000..d47d2473c3
--- /dev/null
+++ b/test/unicode/is_valid_codepoint_test.cc
@@ -0,0 +1,67 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+TEST(Unicode_is_valid_codepoint, null) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0x0000));
+}
+
+TEST(Unicode_is_valid_codepoint, ascii_letter) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0x0041));
+}
+
+TEST(Unicode_is_valid_codepoint, ascii_high_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0x007F));
+}
+
+TEST(Unicode_is_valid_codepoint, latin_extended) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0x00E9));
+}
+
+TEST(Unicode_is_valid_codepoint, just_below_surrogate_range) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0xD7FF));
+}
+
+TEST(Unicode_is_valid_codepoint, low_surrogate_low_boundary_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_valid_codepoint(0xD800));
+}
+
+TEST(Unicode_is_valid_codepoint, low_surrogate_high_boundary_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_valid_codepoint(0xDBFF));
+}
+
+TEST(Unicode_is_valid_codepoint, high_surrogate_low_boundary_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_valid_codepoint(0xDC00));
+}
+
+TEST(Unicode_is_valid_codepoint, high_surrogate_high_boundary_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_valid_codepoint(0xDFFF));
+}
+
+TEST(Unicode_is_valid_codepoint, just_above_surrogate_range) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0xE000));
+}
+
+TEST(Unicode_is_valid_codepoint, max_bmp) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0xFFFF));
+}
+
+TEST(Unicode_is_valid_codepoint, smp_low_boundary) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0x10000));
+}
+
+TEST(Unicode_is_valid_codepoint, emoji_grinning_face) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0x1F600));
+}
+
+TEST(Unicode_is_valid_codepoint, max_codepoint) {
+  EXPECT_TRUE(sourcemeta::core::is_valid_codepoint(0x10FFFF));
+}
+
+TEST(Unicode_is_valid_codepoint, just_above_max_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_valid_codepoint(0x110000));
+}
+
+TEST(Unicode_is_valid_codepoint, far_above_max_rejected) {
+  EXPECT_FALSE(sourcemeta::core::is_valid_codepoint(0x1FFFFF));
+}
diff --git a/test/unicode/unicode_test.cc b/test/unicode/unicode_test.cc
deleted file mode 100644
index 6650920404..0000000000
--- a/test/unicode/unicode_test.cc
+++ /dev/null
@@ -1,228 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <sourcemeta/core/unicode.h>
-
-#include <sstream> // std::istringstream, std::ostringstream
-#include <string>  // std::string, std::u32string
-
-TEST(Unicode, codepoint_to_utf8_ascii_letter) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x41), "A");
-}
-
-TEST(Unicode, codepoint_to_utf8_ascii_null) {
-  const std::string expected(1, '\0');
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x00), expected);
-}
-
-TEST(Unicode, codepoint_to_utf8_ascii_max) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x7F), "\x7F");
-}
-
-TEST(Unicode, codepoint_to_utf8_two_byte_min) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x80), "\xC2\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_two_byte_latin_e_acute) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0xE9), "\xC3\xA9");
-}
-
-TEST(Unicode, codepoint_to_utf8_two_byte_max) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x7FF), "\xDF\xBF");
-}
-
-TEST(Unicode, codepoint_to_utf8_three_byte_min) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x800), "\xE0\xA0\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_three_byte_cjk) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x4E16), "\xE4\xB8\x96");
-}
-
-TEST(Unicode, codepoint_to_utf8_three_byte_max) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0xFFFF), "\xEF\xBF\xBF");
-}
-
-TEST(Unicode, codepoint_to_utf8_four_byte_min) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x10000), "\xF0\x90\x80\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_four_byte_emoji) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x1F600), "\xF0\x9F\x98\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_four_byte_max) {
-  EXPECT_EQ(sourcemeta::core::codepoint_to_utf8(0x10FFFF), "\xF4\x8F\xBF\xBF");
-}
-
-TEST(Unicode, codepoint_to_utf8_stream_ascii_letter) {
-  std::ostringstream output;
-  sourcemeta::core::codepoint_to_utf8(0x41, output);
-  EXPECT_EQ(output.str(), "A");
-}
-
-TEST(Unicode, codepoint_to_utf8_stream_two_byte_latin_e_acute) {
-  std::ostringstream output;
-  sourcemeta::core::codepoint_to_utf8(0xE9, output);
-  EXPECT_EQ(output.str(), "\xC3\xA9");
-}
-
-TEST(Unicode, codepoint_to_utf8_stream_three_byte_cjk) {
-  std::ostringstream output;
-  sourcemeta::core::codepoint_to_utf8(0x4E16, output);
-  EXPECT_EQ(output.str(), "\xE4\xB8\x96");
-}
-
-TEST(Unicode, codepoint_to_utf8_stream_four_byte_emoji) {
-  std::ostringstream output;
-  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
-  EXPECT_EQ(output.str(), "\xF0\x9F\x98\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_stream_multiple_codepoints) {
-  std::ostringstream output;
-  sourcemeta::core::codepoint_to_utf8(0x48, output);
-  sourcemeta::core::codepoint_to_utf8(0xE9, output);
-  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
-  EXPECT_EQ(output.str(), "H\xC3\xA9\xF0\x9F\x98\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_string_ascii_letter) {
-  std::string output;
-  sourcemeta::core::codepoint_to_utf8(0x41, output);
-  EXPECT_EQ(output, "A");
-}
-
-TEST(Unicode, codepoint_to_utf8_string_two_byte_latin_e_acute) {
-  std::string output;
-  sourcemeta::core::codepoint_to_utf8(0xE9, output);
-  EXPECT_EQ(output, "\xC3\xA9");
-}
-
-TEST(Unicode, codepoint_to_utf8_string_three_byte_cjk) {
-  std::string output;
-  sourcemeta::core::codepoint_to_utf8(0x4E16, output);
-  EXPECT_EQ(output, "\xE4\xB8\x96");
-}
-
-TEST(Unicode, codepoint_to_utf8_string_four_byte_emoji) {
-  std::string output;
-  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
-  EXPECT_EQ(output, "\xF0\x9F\x98\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_string_multiple_codepoints) {
-  std::string output;
-  sourcemeta::core::codepoint_to_utf8(0x48, output);
-  sourcemeta::core::codepoint_to_utf8(0xE9, output);
-  sourcemeta::core::codepoint_to_utf8(0x1F600, output);
-  EXPECT_EQ(output, "H\xC3\xA9\xF0\x9F\x98\x80");
-}
-
-TEST(Unicode, codepoint_to_utf8_string_four_byte_max) {
-  std::string output;
-  sourcemeta::core::codepoint_to_utf8(0x10FFFF, output);
-  EXPECT_EQ(output, "\xF4\x8F\xBF\xBF");
-}
-
-TEST(Unicode, utf8_to_utf32_ascii) {
-  std::istringstream input{"Hello"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_TRUE(result.has_value());
-  const std::u32string expected{0x48, 0x65, 0x6C, 0x6C, 0x6F};
-  EXPECT_EQ(result.value(), expected);
-}
-
-TEST(Unicode, utf8_to_utf32_empty) {
-  std::istringstream input{""};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_TRUE(result.has_value());
-  EXPECT_TRUE(result.value().empty());
-}
-
-TEST(Unicode, utf8_to_utf32_two_byte) {
-  std::istringstream input{"\xC3\xA9"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_TRUE(result.has_value());
-  const std::u32string expected{0xE9};
-  EXPECT_EQ(result.value(), expected);
-}
-
-TEST(Unicode, utf8_to_utf32_three_byte_cjk) {
-  std::istringstream input{"\xE4\xB8\x96"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_TRUE(result.has_value());
-  const std::u32string expected{0x4E16};
-  EXPECT_EQ(result.value(), expected);
-}
-
-TEST(Unicode, utf8_to_utf32_four_byte_emoji) {
-  std::istringstream input{"\xF0\x9F\x98\x80"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_TRUE(result.has_value());
-  const std::u32string expected{0x1F600};
-  EXPECT_EQ(result.value(), expected);
-}
-
-TEST(Unicode, utf8_to_utf32_mixed) {
-  std::istringstream input{"H\xC3\xA9\xE4\xB8\x96\xF0\x9F\x98\x80"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_TRUE(result.has_value());
-  const std::u32string expected{0x48, 0xE9, 0x4E16, 0x1F600};
-  EXPECT_EQ(result.value(), expected);
-}
-
-TEST(Unicode, utf8_to_utf32_invalid_continuation) {
-  std::istringstream input{"\xC3\x28"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_FALSE(result.has_value());
-}
-
-TEST(Unicode, utf8_to_utf32_truncated_sequence) {
-  std::istringstream input{"\xE4\xB8"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_FALSE(result.has_value());
-}
-
-TEST(Unicode, utf8_to_utf32_overlong_encoding) {
-  std::istringstream input{"\xC0\x80"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_FALSE(result.has_value());
-}
-
-TEST(Unicode, utf8_to_utf32_surrogate_codepoint) {
-  std::istringstream input{"\xED\xA0\x80"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_FALSE(result.has_value());
-}
-
-TEST(Unicode, utf8_to_utf32_invalid_start_byte) {
-  std::istringstream input{"\xFF"};
-  const auto result{sourcemeta::core::utf8_to_utf32(input)};
-  EXPECT_FALSE(result.has_value());
-}
-
-TEST(Unicode, utf8_to_utf32_string_view_ascii) {
-  const auto result{sourcemeta::core::utf8_to_utf32("Hello")};
-  EXPECT_TRUE(result.has_value());
-  const std::u32string expected{0x48, 0x65, 0x6C, 0x6C, 0x6F};
-  EXPECT_EQ(result.value(), expected);
-}
-
-TEST(Unicode, utf8_to_utf32_string_view_empty) {
-  const auto result{sourcemeta::core::utf8_to_utf32("")};
-  EXPECT_TRUE(result.has_value());
-  EXPECT_TRUE(result.value().empty());
-}
-
-TEST(Unicode, utf8_to_utf32_string_view_mixed) {
-  const auto result{
-      sourcemeta::core::utf8_to_utf32("H\xC3\xA9\xE4\xB8\x96\xF0\x9F\x98\x80")};
-  EXPECT_TRUE(result.has_value());
-  const std::u32string expected{0x48, 0xE9, 0x4E16, 0x1F600};
-  EXPECT_EQ(result.value(), expected);
-}
-
-TEST(Unicode, utf8_to_utf32_string_view_invalid) {
-  const auto result{sourcemeta::core::utf8_to_utf32("\xFF")};
-  EXPECT_FALSE(result.has_value());
-}
diff --git a/test/unicode/utf8_codepoint_byte_count_test.cc b/test/unicode/utf8_codepoint_byte_count_test.cc
new file mode 100644
index 0000000000..64e9d532f2
--- /dev/null
+++ b/test/unicode/utf8_codepoint_byte_count_test.cc
@@ -0,0 +1,63 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+TEST(Unicode_utf8_codepoint_byte_count, ascii_null) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x0000), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, ascii_letter) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x0041), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, ascii_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x007F), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, two_byte_low_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x0080), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, two_byte_latin_e_acute) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x00E9), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, two_byte_greek_alpha) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x03B1), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, two_byte_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x07FF), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, three_byte_low_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x0800), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, three_byte_cjk) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x4E2D), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, three_byte_korean_si) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0xC2E4), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, three_byte_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0xFFFF), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, four_byte_low_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x10000), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, four_byte_emoji_grinning) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x1F600), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, four_byte_smp_mid) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x40000), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_byte_count, four_byte_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_byte_count(0x10FFFF), 4u);
+}
diff --git a/test/unicode/utf8_codepoint_length_test.cc b/test/unicode/utf8_codepoint_length_test.cc
new file mode 100644
index 0000000000..561dd1cb91
--- /dev/null
+++ b/test/unicode/utf8_codepoint_length_test.cc
@@ -0,0 +1,300 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+#include <string> // std::string
+
+TEST(Unicode_utf8_codepoint_length, empty_input_returns_zero) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, position_at_size_returns_zero) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("A", 1), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, position_past_size_returns_zero) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("A", 5), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, ascii_letter_returns_one) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("A", 0), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_length, ascii_null_returns_one) {
+  const std::string null_byte(1, '\0');
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length(null_byte, 0), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_length, ascii_low_boundary_returns_one) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\x01", 0), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_length, ascii_high_boundary_returns_one) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\x7f", 0), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_length, ascii_digit_returns_one) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("0", 0), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_length, ascii_space_returns_one) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length(" ", 0), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_lead_low_boundary) {
+  // U+0080: \xC2\x80 (smallest 2-byte codepoint)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xc2\x80", 0), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_lead_high_boundary) {
+  // U+07FF: \xDF\xBF (largest 2-byte codepoint)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xdf\xbf", 0), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_greek_alpha) {
+  // U+03B1 GREEK SMALL ALPHA: \xCE\xB1
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xce\xb1", 0), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_latin_e_acute) {
+  // U+00E9 LATIN SMALL E ACUTE: \xC3\xA9
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xc3\xa9", 0), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_overlong_c0_zero) {
+  // %xC0 is forbidden (overlong encoding of U+0000)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xc0\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_overlong_c1_max) {
+  // %xC1 is forbidden (overlong encoding of U+007F)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xc1\xbf", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_invalid_tail_below_range) {
+  // UTF8-tail = %x80-BF; \x7F is below range
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xce\x7f", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_invalid_tail_above_range) {
+  // UTF8-tail = %x80-BF; \xC0 is above range
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xce\xc0", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_truncated) {
+  // Lead byte with no continuation
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xce", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_e0_low_boundary) {
+  // U+0800: \xE0\xA0\x80 (smallest 3-byte codepoint)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe0\xa0\x80", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_e0_high_boundary) {
+  // U+0FFF: \xE0\xBF\xBF
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe0\xbf\xbf", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_e0_overlong_low) {
+  // %xE0 %x80-9F is overlong (codepoints < U+0800)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe0\x80\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_e0_overlong_boundary) {
+  // %xE0 %x9F is just below the valid %xA0
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe0\x9f\xbf", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_e1_letter) {
+  // U+1000 MYANMAR: \xE1\x80\x80
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe1\x80\x80", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_e4_cjk) {
+  // U+4E2D CJK 中: \xE4\xB8\xAD
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe4\xb8\xad", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_ec_high_boundary) {
+  // %xEC range is fully open: \xEC\xBF\xBF (U+CFFF)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xec\xbf\xbf", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_ec_korean_si) {
+  // U+C2E4 실 (Hangul SI): \xEC\x8B\xA4
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xec\x8b\xa4", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_ed_low_boundary) {
+  // U+D000: \xED\x80\x80 (just below surrogate range)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xed\x80\x80", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_ed_high_boundary) {
+  // U+D7FF: \xED\x9F\xBF (the last codepoint before the surrogate range)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xed\x9f\xbf", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_surrogate_low) {
+  // U+D800: \xED\xA0\x80 (forbidden surrogate range)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xed\xa0\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_surrogate_high) {
+  // U+DFFF: \xED\xBF\xBF (forbidden surrogate range)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xed\xbf\xbf", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_ee_low_boundary) {
+  // U+E000 (first codepoint after the surrogate range): \xEE\x80\x80
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xee\x80\x80", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_ef_high_boundary) {
+  // U+FFFF (largest 3-byte codepoint): \xEF\xBF\xBF
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xef\xbf\xbf", 0), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_truncated_one_byte) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe4", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_truncated_two_bytes) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe4\xb8", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_invalid_third_byte) {
+  // Third byte must be %x80-BF
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xe4\xb8\x7f", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f0_low_boundary) {
+  // U+10000 (smallest 4-byte codepoint): \xF0\x90\x80\x80
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x90\x80\x80", 0), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f0_high_boundary) {
+  // %xF0 %xBF\xBF\xBF (U+3FFFF)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\xbf\xbf\xbf", 0), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f0_overlong_low) {
+  // %xF0 %x80-8F is overlong (codepoints < U+10000)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x80\x80\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f0_overlong_high_boundary) {
+  // %xF0 %x8F is just below the valid %x90
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x8f\xbf\xbf", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f1_low) {
+  // U+40000: \xF1\x80\x80\x80
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf1\x80\x80\x80", 0), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f3_high) {
+  // U+FFFFF: \xF3\xBF\xBF\xBF
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf3\xbf\xbf\xbf", 0), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_emoji_grinning) {
+  // U+1F600 😀: \xF0\x9F\x98\x80
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x9f\x98\x80", 0), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f4_low_boundary) {
+  // U+100000: \xF4\x80\x80\x80
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf4\x80\x80\x80", 0), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f4_high_boundary) {
+  // U+10FFFF (last valid Unicode codepoint): \xF4\x8F\xBF\xBF
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf4\x8f\xbf\xbf", 0), 4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_above_max_codepoint) {
+  // %xF4 %x90+ would encode codepoints > U+10FFFF (forbidden)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf4\x90\x80\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_above_max_high) {
+  // %xF4 %xBF\xBF\xBF would encode U+13FFFF (forbidden)
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf4\xbf\xbf\xbf", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_truncated_one_byte) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_truncated_two_bytes) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x9f", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_truncated_three_bytes) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x9f\x98", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_invalid_third) {
+  // Third byte must be %x80-BF
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x9f\x7f\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_invalid_fourth) {
+  // Fourth byte must be %x80-BF
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf0\x9f\x98\xc0", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_f5_forbidden_lead) {
+  // %xF5-FF are not valid lead bytes per RFC 6532 §3.1
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xf5\x80\x80\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, lead_byte_fe_forbidden) {
+  // %xFE is not a valid lead byte
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xfe\x80\x80\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, lead_byte_ff_forbidden) {
+  // %xFF is not a valid lead byte
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xff", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, lone_continuation_low) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\x80", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, lone_continuation_high) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xbf", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, lone_continuation_middle) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("\xa0", 0), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, ascii_at_offset) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("ABC", 1), 1u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_at_offset) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("xy\xce\xb1z", 2), 2u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_at_offset) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("xy\xe4\xb8\xad", 2), 3u);
+}
+
+TEST(Unicode_utf8_codepoint_length, four_byte_at_offset) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("xy\xf0\x9f\x98\x80", 2),
+            4u);
+}
+
+TEST(Unicode_utf8_codepoint_length, two_byte_truncated_at_offset) {
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("xy\xce", 2), 0u);
+}
+
+TEST(Unicode_utf8_codepoint_length, three_byte_truncated_at_offset) {
+  // Only one byte after the lead at position 2
+  EXPECT_EQ(sourcemeta::core::utf8_codepoint_length("xy\xe4\xb8", 2), 0u);
+}
diff --git a/test/unicode/utf8_lead_byte_size_test.cc b/test/unicode/utf8_lead_byte_size_test.cc
new file mode 100644
index 0000000000..d7b3b7a3d5
--- /dev/null
+++ b/test/unicode/utf8_lead_byte_size_test.cc
@@ -0,0 +1,87 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+TEST(Unicode_utf8_lead_byte_size, ascii_null) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0x00), 1u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, ascii_letter) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0x41), 1u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, ascii_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0x7F), 1u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, continuation_low_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0x80), 0u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, continuation_mid) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xA0), 0u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, continuation_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xBF), 0u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, overlong_c0) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xC0), 0u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, overlong_c1) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xC1), 0u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, two_byte_low_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xC2), 2u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, two_byte_mid) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xCE), 2u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, two_byte_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xDF), 2u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, three_byte_low_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xE0), 3u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, three_byte_mid_e4) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xE4), 3u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, three_byte_ed) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xED), 3u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, three_byte_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xEF), 3u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, four_byte_low_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xF0), 4u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, four_byte_mid_f2) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xF2), 4u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, four_byte_high_boundary) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xF4), 4u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, above_range_f5) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xF5), 0u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, above_range_fe) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xFE), 0u);
+}
+
+TEST(Unicode_utf8_lead_byte_size, above_range_ff) {
+  EXPECT_EQ(sourcemeta::core::utf8_lead_byte_size(0xFF), 0u);
+}
diff --git a/test/unicode/utf8_to_utf32_test.cc b/test/unicode/utf8_to_utf32_test.cc
new file mode 100644
index 0000000000..907c6d012c
--- /dev/null
+++ b/test/unicode/utf8_to_utf32_test.cc
@@ -0,0 +1,109 @@
+#include <gtest/gtest.h>
+
+#include <sourcemeta/core/unicode.h>
+
+#include <sstream> // std::istringstream
+#include <string>  // std::u32string
+
+TEST(Unicode_utf8_to_utf32, ascii) {
+  std::istringstream input{"Hello"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_TRUE(result.has_value());
+  const std::u32string expected{0x48, 0x65, 0x6C, 0x6C, 0x6F};
+  EXPECT_EQ(result.value(), expected);
+}
+
+TEST(Unicode_utf8_to_utf32, empty) {
+  std::istringstream input{""};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_TRUE(result.has_value());
+  EXPECT_TRUE(result.value().empty());
+}
+
+TEST(Unicode_utf8_to_utf32, two_byte) {
+  std::istringstream input{"\xC3\xA9"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_TRUE(result.has_value());
+  const std::u32string expected{0xE9};
+  EXPECT_EQ(result.value(), expected);
+}
+
+TEST(Unicode_utf8_to_utf32, three_byte_cjk) {
+  std::istringstream input{"\xE4\xB8\x96"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_TRUE(result.has_value());
+  const std::u32string expected{0x4E16};
+  EXPECT_EQ(result.value(), expected);
+}
+
+TEST(Unicode_utf8_to_utf32, four_byte_emoji) {
+  std::istringstream input{"\xF0\x9F\x98\x80"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_TRUE(result.has_value());
+  const std::u32string expected{0x1F600};
+  EXPECT_EQ(result.value(), expected);
+}
+
+TEST(Unicode_utf8_to_utf32, mixed) {
+  std::istringstream input{"H\xC3\xA9\xE4\xB8\x96\xF0\x9F\x98\x80"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_TRUE(result.has_value());
+  const std::u32string expected{0x48, 0xE9, 0x4E16, 0x1F600};
+  EXPECT_EQ(result.value(), expected);
+}
+
+TEST(Unicode_utf8_to_utf32, invalid_continuation) {
+  std::istringstream input{"\xC3\x28"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_FALSE(result.has_value());
+}
+
+TEST(Unicode_utf8_to_utf32, truncated_sequence) {
+  std::istringstream input{"\xE4\xB8"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_FALSE(result.has_value());
+}
+
+TEST(Unicode_utf8_to_utf32, overlong_encoding) {
+  std::istringstream input{"\xC0\x80"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_FALSE(result.has_value());
+}
+
+TEST(Unicode_utf8_to_utf32, surrogate_codepoint) {
+  std::istringstream input{"\xED\xA0\x80"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_FALSE(result.has_value());
+}
+
+TEST(Unicode_utf8_to_utf32, invalid_start_byte) {
+  std::istringstream input{"\xFF"};
+  const auto result{sourcemeta::core::utf8_to_utf32(input)};
+  EXPECT_FALSE(result.has_value());
+}
+
+TEST(Unicode_utf8_to_utf32, string_view_ascii) {
+  const auto result{sourcemeta::core::utf8_to_utf32("Hello")};
+  EXPECT_TRUE(result.has_value());
+  const std::u32string expected{0x48, 0x65, 0x6C, 0x6C, 0x6F};
+  EXPECT_EQ(result.value(), expected);
+}
+
+TEST(Unicode_utf8_to_utf32, string_view_empty) {
+  const auto result{sourcemeta::core::utf8_to_utf32("")};
+  EXPECT_TRUE(result.has_value());
+  EXPECT_TRUE(result.value().empty());
+}
+
+TEST(Unicode_utf8_to_utf32, string_view_mixed) {
+  const auto result{
+      sourcemeta::core::utf8_to_utf32("H\xC3\xA9\xE4\xB8\x96\xF0\x9F\x98\x80")};
+  EXPECT_TRUE(result.has_value());
+  const std::u32string expected{0x48, 0xE9, 0x4E16, 0x1F600};
+  EXPECT_EQ(result.value(), expected);
+}
+
+TEST(Unicode_utf8_to_utf32, string_view_invalid) {
+  const auto result{sourcemeta::core::utf8_to_utf32("\xFF")};
+  EXPECT_FALSE(result.has_value());
+}