Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS})
elseif(component STREQUAL "email")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_dns.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_email.cmake")
elseif(component STREQUAL "uri")
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake")
Expand Down
4 changes: 3 additions & 1 deletion src/core/email/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME email
SOURCES email.cc)
SOURCES email.cc helpers.h)

if(SOURCEMETA_CORE_INSTALL)
sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME email)
Expand All @@ -9,3 +9,5 @@ target_link_libraries(sourcemeta_core_email
PRIVATE sourcemeta::core::dns)
target_link_libraries(sourcemeta_core_email
PRIVATE sourcemeta::core::ip)
target_link_libraries(sourcemeta_core_email
PRIVATE sourcemeta::core::unicode)
187 changes: 57 additions & 130 deletions src/core/email/email.cc
Original file line number Diff line number Diff line change
@@ -1,110 +1,15 @@
#include <sourcemeta/core/email.h>

#include <sourcemeta/core/dns.h>
#include <sourcemeta/core/ip.h>

namespace sourcemeta::core {

// RFC 5321 §4.1.2: atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" /
// "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" / "_" / "`" /
// "{" / "|" / "}" / "~"
static constexpr auto is_atext(const char character) -> bool {
switch (character) {
case '!':
case '#':
case '$':
case '%':
case '&':
case '\'':
case '*':
case '+':
case '-':
case '/':
case '=':
case '?':
case '^':
case '_':
case '`':
case '{':
case '|':
case '}':
case '~':
return true;
default:
return (character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z') ||
(character >= '0' && character <= '9');
}
}

// RFC 5321 §4.1.2: qtextSMTP = %d32-33 / %d35-91 / %d93-126
static constexpr auto is_qtext_smtp(const unsigned char character) -> bool {
return (character >= 32 && character <= 33) ||
(character >= 35 && character <= 91) ||
(character >= 93 && character <= 126);
}

// RFC 5321 §4.1.2: Let-dig = ALPHA / DIGIT
static constexpr auto is_let_dig(const char character) -> bool {
return (character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z') ||
(character >= '0' && character <= '9');
}

// RFC 5321 §4.1.3: dcontent = %d33-90 / %d94-126
static constexpr auto is_dcontent(const unsigned char character) -> bool {
return (character >= 33 && character <= 90) ||
(character >= 94 && character <= 126);
}

// RFC 5321 §4.1.2: Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
// RFC 5321 §4.1.3: Standardized-tag = Ldh-str
static constexpr auto is_ldh_str(const std::string_view value) -> bool {
if (value.empty() || !is_let_dig(value.back())) {
return false;
}
for (std::string_view::size_type position{0}; position + 1 < value.size();
position += 1) {
const auto character{value[position]};
if (!is_let_dig(character) && character != '-') {
return false;
}
}
return true;
}

// RFC 5234 §2.3: ABNF literal strings are case-insensitive by default
// RFC 5321 §4.1.3: IPv6-address-literal prefix is the literal "IPv6:"
static constexpr auto matches_ipv6_tag(const std::string_view value) -> bool {
return value.size() >= 5 && (value[0] == 'I' || value[0] == 'i') &&
(value[1] == 'P' || value[1] == 'p') &&
(value[2] == 'v' || value[2] == 'V') && value[3] == '6' &&
value[4] == ':';
}
#include "helpers.h"

// RFC 5321 §4.1.3: General-address-literal = Standardized-tag ":" 1*dcontent
static constexpr auto is_general_address_literal(const std::string_view value)
-> bool {
const auto colon_position{value.find(':')};
if (colon_position == std::string_view::npos) {
return false;
}
if (!is_ldh_str(value.substr(0, colon_position))) {
return false;
}
const auto content{value.substr(colon_position + 1)};
if (content.empty()) {
return false;
}
for (const auto character : content) {
if (!is_dcontent(static_cast<unsigned char>(character))) {
return false;
}
}
return true;
}
namespace sourcemeta::core {

auto is_email(const std::string_view value) -> bool {
// RFC 5321 §4.1.2 Mailbox grammar. When AllowUtf8 is true, RFC 6531 §3.3
// extends atext, qtextSMTP, and sub-domain with UTF8-non-ascii alternatives
template <bool AllowUtf8>
static auto is_mailbox(const std::string_view value) -> bool {
if (value.empty()) {
return false;
}
Expand All @@ -126,11 +31,23 @@ auto is_email(const std::string_view value) -> bool {
return false;
}
position += 1;
} else {
if (!is_qtext_smtp(static_cast<unsigned char>(value[position]))) {
continue;
}

if (is_qtext_smtp(static_cast<unsigned char>(value[position]))) {
position += 1;
continue;
}

if constexpr (AllowUtf8) {
// RFC 6531 §3.3: qtextSMTP =/ UTF8-non-ascii
const auto utf8_length{utf8_codepoint_length(value, position)};
if (utf8_length < 2) {
return false;
}
position += 1;
position += utf8_length;
} else {
return false;
}
}
if (position >= value.size()) {
Expand All @@ -150,13 +67,29 @@ auto is_email(const std::string_view value) -> bool {
}
previous_was_dot = true;
atom_started = false;
} else if (is_atext(character)) {
position += 1;
continue;
}

if (is_atext(character)) {
previous_was_dot = false;
atom_started = true;
position += 1;
continue;
}

if constexpr (AllowUtf8) {
// RFC 6531 §3.3: atext =/ UTF8-non-ascii
const auto utf8_length{utf8_codepoint_length(value, position)};
if (utf8_length < 2) {
return false;
}
previous_was_dot = false;
atom_started = true;
position += utf8_length;
} else {
return false;
}
position += 1;
}
if (position == 0 || previous_was_dot) {
return false;
Expand All @@ -177,32 +110,26 @@ auto is_email(const std::string_view value) -> bool {

// RFC 5321 §4.1.3: address-literal = "[" ( IPv4 / IPv6 / General ) "]"
if (!domain.empty() && domain.front() == '[') {
if (domain.back() != ']') {
return false;
}
// RFC 5321 §4.5.3.1.2: 255-octet cap on a domain "name or number"
if (domain.size() > 255) {
return false;
}
const auto inner{domain.substr(1, domain.size() - 2)};
// RFC 5321 §4.1.3: IPv6-address-literal = "IPv6:" IPv6-addr
if (matches_ipv6_tag(inner) && is_ipv6(inner.substr(5))) {
return true;
}
// RFC 5234 §3.2: ABNF alternatives are unordered. A failed IPv6 match
// falls through to IPv4 or General-address-literal.
// RFC 5321 §4.1.3: IPv4-address-literal = Snum 3("." Snum) has no ":",
// General-address-literal requires ":"
if (inner.find(':') == std::string_view::npos) {
return is_ipv4(inner);
}
return is_general_address_literal(inner);
return is_address_literal(domain);
}

// RFC 5321 §4.1.2 Domain matches is_hostname (RFC 1123 §2.1) by
// grammar, by 63-octet label cap (RFC 1035 §2.3.4), and by
// 255-octet total cap (RFC 5321 §4.5.3.1.2)
return is_hostname(domain);
if constexpr (AllowUtf8) {
// RFC 6531 §3.3: sub-domain =/ U-label
return is_idn_domain(domain);
} else {
// RFC 5321 §4.1.2 Domain matches is_hostname (RFC 1123 §2.1) by
// grammar, by 63-octet label cap (RFC 1035 §2.3.4), and by
// 255-octet total cap (RFC 5321 §4.5.3.1.2)
return is_hostname(domain);
}
}

auto is_email(const std::string_view value) -> bool {
return is_mailbox<false>(value);
}

auto is_idn_email(const std::string_view value) -> bool {
return is_mailbox<true>(value);
}

} // namespace sourcemeta::core
Loading
Loading