diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java b/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java index 40ebc9cb81..bdf0ec9c37 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java @@ -113,6 +113,32 @@ public class PercentCodec { RFC5987_UNRESERVED.set('~'); } + static final BitSet PCHAR = new BitSet(256); + static final BitSet USERINFO = new BitSet(256); + static final BitSet REG_NAME = new BitSet(256); + static final BitSet PATH_SEGMENT = new BitSet(256); + static final BitSet QUERY = new BitSet(256); + static final BitSet FRAGMENT = new BitSet(256); + + static { + PCHAR.or(UNRESERVED); + PCHAR.or(SUB_DELIMS); + PCHAR.set(':'); + PCHAR.set('@'); + USERINFO.or(UNRESERVED); + USERINFO.or(SUB_DELIMS); + USERINFO.set(':'); + REG_NAME.or(UNRESERVED); + REG_NAME.or(SUB_DELIMS); + PATH_SEGMENT.or(PCHAR); + QUERY.or(PCHAR); + QUERY.set('/'); + QUERY.set('?'); + FRAGMENT.or(PCHAR); + FRAGMENT.set('/'); + FRAGMENT.set('?'); + } + private static final int RADIX = 16; static void encode(final StringBuilder buf, final CharSequence content, final Charset charset, diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java index 7151d7bd97..3925c6ff76 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java @@ -34,6 +34,7 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -87,9 +88,36 @@ public static URIBuilder loopbackAddress() { private Charset charset; private String fragment; private String encodedFragment; + private EncodingPolicy encodingPolicy = EncodingPolicy.ALL_RESERVED; private boolean plusAsBlank; + /** + * Defines the encoding policy for URI components in {@link URIBuilder}. + * This enum controls how characters are percent-encoded when constructing a URI, + * allowing flexibility between strict encoding and RFC 3986-compliant behavior. + * + * @since 5.4 + */ + public enum EncodingPolicy { + /** + * Encodes all reserved characters, allowing only unreserved characters + * (ALPHA, DIGIT, "-", ".", "_", "~") to remain unencoded. This is a strict + * policy suitable for conservative URI production where maximum encoding + * is desired. + */ + ALL_RESERVED, + + /** + * Follows RFC 3986 component-specific encoding rules. For example, query and + * fragment components allow unreserved characters, sub-delimiters ("!", "$", + * "&", "'", "(", ")", "*", "+", ",", ";", "="), and additional characters + * (":", "@", "/", "?") to remain unencoded, as defined by {@code PercentCodec.FRAGMENT}. + * This policy ensures compliance with RFC 3986 while maintaining interoperability. + */ + RFC_3986 + } + /** * Constructs an empty instance. */ @@ -175,6 +203,22 @@ public URIBuilder setCharset(final Charset charset) { return this; } + /** + * Sets the encoding policy for this {@link URIBuilder}. + * The encoding policy determines how URI components (e.g., query, fragment) are + * percent-encoded when building the URI string. If not set, the default policy + * is {@link EncodingPolicy#RFC_3986}. + * + * @param encodingPolicy the encoding policy to apply, or {@code null} to reset + * to the default ({@link EncodingPolicy#ALL_RESERVED}) + * @return this {@link URIBuilder} instance for method chaining + * @since 5.4 + */ + public URIBuilder setEncodingPolicy(final EncodingPolicy encodingPolicy) { + this.encodingPolicy = encodingPolicy; + return this; + } + /** * Gets the authority. * @@ -300,33 +344,46 @@ static List parsePath(final CharSequence s, final Charset charset) { return list; } - static void formatPath(final StringBuilder buf, final Iterable segments, final boolean rootless, final Charset charset) { + static void formatPath(final StringBuilder buf, final Iterable segments, final boolean rootless, + final Charset charset, final BitSet safechars) { int i = 0; for (final String segment : segments) { if (i > 0 || !rootless) { buf.append(PATH_SEPARATOR); } - PercentCodec.encode(buf, segment, charset); + PercentCodec.encode(buf, segment, charset, safechars, false); i++; } } - static void formatQuery(final StringBuilder buf, final Iterable params, final Charset charset, - final boolean blankAsPlus) { + static void formatPath(final StringBuilder buf, final Iterable segments, final boolean rootless, + final Charset charset) { + formatPath(buf, segments, rootless, charset, PercentCodec.UNRESERVED); + } + + + static void formatQuery(final StringBuilder buf, final Iterable params, + final Charset charset, final BitSet safechars, final boolean blankAsPlus) { int i = 0; for (final NameValuePair parameter : params) { if (i > 0) { buf.append(QUERY_PARAM_SEPARATOR); } - PercentCodec.encode(buf, parameter.getName(), charset, blankAsPlus); + PercentCodec.encode(buf, parameter.getName(), charset, safechars, blankAsPlus); if (parameter.getValue() != null) { buf.append(PARAM_VALUE_SEPARATOR); - PercentCodec.encode(buf, parameter.getValue(), charset, blankAsPlus); + PercentCodec.encode(buf, parameter.getValue(), charset, safechars, blankAsPlus); } i++; } } + static void formatQuery(final StringBuilder buf, final Iterable params, + final Charset charset, final boolean blankAsPlus) { + formatQuery(buf, params, charset, PercentCodec.UNRESERVED, blankAsPlus); + } + + /** * Builds a {@link URI} instance. */ @@ -356,18 +413,22 @@ private String buildString() { } else if (this.userInfo != null) { final int idx = this.userInfo.indexOf(':'); if (idx != -1) { - PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset); + PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false); sb.append(':'); - PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset); + PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false); } else { - PercentCodec.encode(sb, this.userInfo, this.charset); + PercentCodec.encode(sb, this.userInfo, this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false); } sb.append("@"); } if (InetAddressUtils.isIPv6(this.host)) { sb.append("[").append(this.host).append("]"); } else { - PercentCodec.encode(sb, this.host, this.charset); + PercentCodec.encode(sb, this.host, this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.REG_NAME, false); } if (this.port >= 0) { sb.append(":").append(this.port); @@ -382,23 +443,27 @@ private String buildString() { } sb.append(this.encodedPath); } else if (this.pathSegments != null) { - formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset); + formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.PATH_SEGMENT); } if (this.encodedQuery != null) { sb.append("?").append(this.encodedQuery); } else if (this.queryParams != null && !this.queryParams.isEmpty()) { sb.append("?"); - formatQuery(sb, this.queryParams, this.charset, false); + formatQuery(sb, this.queryParams, this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.QUERY, false); } else if (this.query != null) { sb.append("?"); - PercentCodec.encode(sb, this.query, this.charset, PercentCodec.URIC, false); + PercentCodec.encode(sb, this.query, this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.URIC : PercentCodec.QUERY, false); } } if (this.encodedFragment != null) { sb.append("#").append(this.encodedFragment); } else if (this.fragment != null) { sb.append("#"); - PercentCodec.encode(sb, this.fragment, this.charset, PercentCodec.URIC, false); + PercentCodec.encode(sb, this.fragment, this.charset, + encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.URIC : PercentCodec.FRAGMENT, false); } return sb.toString(); } diff --git a/httpcore5/src/test/java/org/apache/hc/core5/net/TestURIBuilder.java b/httpcore5/src/test/java/org/apache/hc/core5/net/TestURIBuilder.java index 98f54f390e..74e7a6016c 100644 --- a/httpcore5/src/test/java/org/apache/hc/core5/net/TestURIBuilder.java +++ b/httpcore5/src/test/java/org/apache/hc/core5/net/TestURIBuilder.java @@ -171,7 +171,7 @@ void testFormatQuery() { @Test void testHierarchicalUri() throws Exception { final URI uri = new URI("http", "stuff", "localhost", 80, "/some stuff", "param=stuff", "fragment"); - final URIBuilder uribuilder = new URIBuilder(uri); + final URIBuilder uribuilder = new URIBuilder(uri).setEncodingPolicy(URIBuilder.EncodingPolicy.ALL_RESERVED); final URI result = uribuilder.build(); Assertions.assertEquals(new URI("http://stuff@localhost:80/some%20stuff?param=stuff#fragment"), result); } @@ -997,4 +997,20 @@ void testSetPlusAsBlank() throws Exception { params = uriBuilder.getQueryParams(); Assertions.assertEquals("hello world", params.get(0).getValue()); } + + @Test + void testCustomQueryEncoding() throws Exception { + final String query = "query param:!@/?\""; + final String expectedEncodedQuery = "query%20param:!@/?%22"; + + final URI uri = new URIBuilder() + .setScheme("http") + .setHost("example.com") + .setCustomQuery(query) + .setEncodingPolicy(URIBuilder.EncodingPolicy.RFC_3986) + .build(); + + Assertions.assertEquals(expectedEncodedQuery, uri.getRawQuery()); + } + }