diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java b/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java index bdf0ec9c3..3e1700002 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java @@ -33,6 +33,8 @@ import java.nio.charset.StandardCharsets; import java.util.BitSet; +import org.apache.hc.core5.annotation.Internal; + /** * Percent-encoding. * @@ -113,6 +115,36 @@ public class PercentCodec { RFC5987_UNRESERVED.set('~'); } + static final BitSet HTTP_TOKEN_UNRESERVED = new BitSet(256); + + static { + // HTTP token characters (tchar) minus '%' (percent-encoded per RFC 7639 canonical form) + for (int i = 'a'; i <= 'z'; i++) { + HTTP_TOKEN_UNRESERVED.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + HTTP_TOKEN_UNRESERVED.set(i); + } + for (int i = '0'; i <= '9'; i++) { + HTTP_TOKEN_UNRESERVED.set(i); + } + + HTTP_TOKEN_UNRESERVED.set('!'); + HTTP_TOKEN_UNRESERVED.set('#'); + HTTP_TOKEN_UNRESERVED.set('$'); + HTTP_TOKEN_UNRESERVED.set('&'); + HTTP_TOKEN_UNRESERVED.set('\''); + HTTP_TOKEN_UNRESERVED.set('*'); + HTTP_TOKEN_UNRESERVED.set('+'); + HTTP_TOKEN_UNRESERVED.set('-'); + HTTP_TOKEN_UNRESERVED.set('.'); + HTTP_TOKEN_UNRESERVED.set('^'); + HTTP_TOKEN_UNRESERVED.set('_'); + HTTP_TOKEN_UNRESERVED.set('`'); + HTTP_TOKEN_UNRESERVED.set('|'); + HTTP_TOKEN_UNRESERVED.set('~'); + } + static final BitSet PCHAR = new BitSet(256); static final BitSet USERINFO = new BitSet(256); static final BitSet REG_NAME = new BitSet(256); @@ -217,10 +249,12 @@ public static String decode(final CharSequence content, final Charset charset) { public static final PercentCodec RFC3986 = new PercentCodec(UNRESERVED); public static final PercentCodec RFC5987 = new PercentCodec(RFC5987_UNRESERVED); + public static final PercentCodec HTTP_TOKEN = new PercentCodec(HTTP_TOKEN_UNRESERVED); private final BitSet unreserved; - private PercentCodec(final BitSet unreserved) { + @Internal + public PercentCodec(final BitSet unreserved) { this.unreserved = unreserved; } diff --git a/httpcore5/src/test/java/org/apache/hc/core5/net/TestPercentCodec.java b/httpcore5/src/test/java/org/apache/hc/core5/net/TestPercentCodec.java index b72e1583d..1ef4ddde8 100644 --- a/httpcore5/src/test/java/org/apache/hc/core5/net/TestPercentCodec.java +++ b/httpcore5/src/test/java/org/apache/hc/core5/net/TestPercentCodec.java @@ -98,4 +98,30 @@ void verifyRfc5987EncodingandDecoding() { assertThat(PercentCodec.RFC5987.decode(PercentCodec.RFC5987.encode(s)), CoreMatchers.equalTo(s)); } + @Test + void testRfc7639CanonicalAlpnTokenEncoding() { + // RFC 7639 requires protocol-id to be a token and applies additional canonical constraints: + // - Octets not allowed in tokens MUST be percent-encoded (RFC 3986). + // - '%' MUST be percent-encoded. + // - Octets that are valid token characters MUST NOT be percent-encoded (except '%'). + // - Uppercase hex digits MUST be used. + assertEquals("h2", PercentCodec.HTTP_TOKEN.encode("h2")); + assertEquals("http%2F1.1", PercentCodec.HTTP_TOKEN.encode("http/1.1")); + assertEquals("%25", PercentCodec.HTTP_TOKEN.encode("%")); + assertEquals("foo+bar", PercentCodec.HTTP_TOKEN.encode("foo+bar")); + assertEquals("!#$&'*+-.^_`|~", PercentCodec.HTTP_TOKEN.encode("!#$&'*+-.^_`|~")); + assertEquals("foo bar", PercentCodec.HTTP_TOKEN.decode("foo%20bar")); + assertEquals("ws/é", PercentCodec.HTTP_TOKEN.decode("ws%2F%C3%A9")); + } + + @Test + void testPercentCodecEncodeIsNotRfc7639Canonical() { + // PercentCodec.encode(..) uses RFC 3986 UNRESERVED as the safe set. + // This percent-encodes valid RFC 7230 tchar like '+', '*', '!', '|', which RFC 7639 forbids. + assertEquals("foo%2Bbar", PercentCodec.encode("foo+bar", StandardCharsets.UTF_8)); + assertEquals("%2A", PercentCodec.encode("*", StandardCharsets.UTF_8)); + assertEquals("%21", PercentCodec.encode("!", StandardCharsets.UTF_8)); + assertEquals("%7C", PercentCodec.encode("|", StandardCharsets.UTF_8)); + } + }