Skip to content

Commit c685037

Browse files
committed
Added EncodingPolicy enum (ALL_RESERVED, RFC_3986) and switched query and fragment encoding to PercentCodec.FRAGMENT under RFC_3986
1 parent 41ba051 commit c685037

File tree

3 files changed

+59
-18
lines changed

3 files changed

+59
-18
lines changed

httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,16 +84,6 @@ public class PercentCodec {
8484
URIC.or(UNRESERVED);
8585
}
8686

87-
static final BitSet FRAGMENT_SAFE = new BitSet(256);
88-
static {
89-
FRAGMENT_SAFE.or(UNRESERVED);
90-
FRAGMENT_SAFE.or(SUB_DELIMS);
91-
FRAGMENT_SAFE.set(':');
92-
FRAGMENT_SAFE.set('@');
93-
FRAGMENT_SAFE.set('/');
94-
FRAGMENT_SAFE.set('?');
95-
}
96-
9787
static final BitSet RFC5987_UNRESERVED = new BitSet(256);
9888

9989
static {

httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,36 @@ public static URIBuilder loopbackAddress() {
8787
private Charset charset;
8888
private String fragment;
8989
private String encodedFragment;
90+
private EncodingPolicy encodingPolicy = EncodingPolicy.ALL_RESERVED;
9091

9192
private boolean plusAsBlank;
9293

94+
/**
95+
* Defines the encoding policy for URI components in {@link URIBuilder}.
96+
* This enum controls how characters are percent-encoded when constructing a URI,
97+
* allowing flexibility between strict encoding and RFC 3986-compliant behavior.
98+
*
99+
* @since 5.4
100+
*/
101+
public enum EncodingPolicy {
102+
/**
103+
* Encodes all reserved characters, allowing only unreserved characters
104+
* (ALPHA, DIGIT, "-", ".", "_", "~") to remain unencoded. This is a strict
105+
* policy suitable for conservative URI production where maximum encoding
106+
* is desired.
107+
*/
108+
ALL_RESERVED,
109+
110+
/**
111+
* Follows RFC 3986 component-specific encoding rules. For example, query and
112+
* fragment components allow unreserved characters, sub-delimiters ("!", "$",
113+
* "&", "'", "(", ")", "*", "+", ",", ";", "="), and additional characters
114+
* (":", "@", "/", "?") to remain unencoded, as defined by {@code PercentCodec.FRAGMENT}.
115+
* This policy ensures compliance with RFC 3986 while maintaining interoperability.
116+
*/
117+
RFC_3986
118+
}
119+
93120
/**
94121
* Constructs an empty instance.
95122
*/
@@ -175,6 +202,22 @@ public URIBuilder setCharset(final Charset charset) {
175202
return this;
176203
}
177204

205+
/**
206+
* Sets the encoding policy for this {@link URIBuilder}.
207+
* The encoding policy determines how URI components (e.g., query, fragment) are
208+
* percent-encoded when building the URI string. If not set, the default policy
209+
* is {@link EncodingPolicy#RFC_3986}.
210+
*
211+
* @param encodingPolicy the encoding policy to apply, or {@code null} to reset
212+
* to the default ({@link EncodingPolicy#RFC_3986})
213+
* @return this {@link URIBuilder} instance for method chaining
214+
* @since 5.4
215+
*/
216+
public URIBuilder setEncodingPolicy(final EncodingPolicy encodingPolicy) {
217+
this.encodingPolicy = encodingPolicy;
218+
return this;
219+
}
220+
178221
/**
179222
* Gets the authority.
180223
*
@@ -356,18 +399,22 @@ private String buildString() {
356399
} else if (this.userInfo != null) {
357400
final int idx = this.userInfo.indexOf(':');
358401
if (idx != -1) {
359-
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset, PercentCodec.USERINFO, false);
402+
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset,
403+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
360404
sb.append(':');
361-
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset, PercentCodec.USERINFO, false);
405+
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset,
406+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
362407
} else {
363-
PercentCodec.encode(sb, this.userInfo, this.charset, PercentCodec.USERINFO, false);
408+
PercentCodec.encode(sb, this.userInfo, this.charset,
409+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
364410
}
365411
sb.append("@");
366412
}
367413
if (InetAddressUtils.isIPv6(this.host)) {
368414
sb.append("[").append(this.host).append("]");
369415
} else {
370-
PercentCodec.encode(sb, this.host, this.charset, PercentCodec.REG_NAME, false);
416+
PercentCodec.encode(sb, this.host, this.charset,
417+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.REG_NAME, false);
371418
}
372419
if (this.port >= 0) {
373420
sb.append(":").append(this.port);
@@ -391,14 +438,16 @@ private String buildString() {
391438
formatQuery(sb, this.queryParams, this.charset, false);
392439
} else if (this.query != null) {
393440
sb.append("?");
394-
PercentCodec.encode(sb, this.query, this.charset, PercentCodec.QUERY, false);
441+
PercentCodec.encode(sb, this.query, this.charset,
442+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.URIC : PercentCodec.QUERY, false);
395443
}
396444
}
397445
if (this.encodedFragment != null) {
398446
sb.append("#").append(this.encodedFragment);
399447
} else if (this.fragment != null) {
400448
sb.append("#");
401-
PercentCodec.encode(sb, this.fragment, this.charset, PercentCodec.FRAGMENT, false);
449+
PercentCodec.encode(sb, this.fragment, this.charset,
450+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.FRAGMENT, false);
402451
}
403452
return sb.toString();
404453
}

httpcore5/src/test/java/org/apache/hc/core5/net/TestURIBuilder.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ void testFormatQuery() {
171171
@Test
172172
void testHierarchicalUri() throws Exception {
173173
final URI uri = new URI("http", "stuff", "localhost", 80, "/some stuff", "param=stuff", "fragment");
174-
final URIBuilder uribuilder = new URIBuilder(uri);
174+
final URIBuilder uribuilder = new URIBuilder(uri).setEncodingPolicy(URIBuilder.EncodingPolicy.ALL_RESERVED);
175175
final URI result = uribuilder.build();
176176
Assertions.assertEquals(new URI("http://stuff@localhost:80/some%20stuff?param=stuff#fragment"), result);
177177
}
@@ -606,7 +606,7 @@ void testHttpHost() throws Exception {
606606

607607
@Test
608608
void testSetHostWithReservedChars() throws Exception {
609-
final URIBuilder uribuilder = new URIBuilder();
609+
final URIBuilder uribuilder = new URIBuilder().setEncodingPolicy(URIBuilder.EncodingPolicy.ALL_RESERVED);
610610
uribuilder.setScheme("http").setHost("!example!.com");
611611
Assertions.assertEquals(URI.create("http://%21example%21.com"), uribuilder.build());
612612
}
@@ -996,6 +996,7 @@ void testFragmentEncoding() throws Exception {
996996
.setScheme("http")
997997
.setHost("example.com")
998998
.setFragment(fragment)
999+
.setEncodingPolicy(URIBuilder.EncodingPolicy.RFC_3986)
9991000
.build();
10001001

10011002
Assertions.assertEquals(expectedEncodedFragment, uri.getRawFragment());
@@ -1010,6 +1011,7 @@ void testCustomQueryEncoding() throws Exception {
10101011
.setScheme("http")
10111012
.setHost("example.com")
10121013
.setCustomQuery(query)
1014+
.setEncodingPolicy(URIBuilder.EncodingPolicy.RFC_3986)
10131015
.build();
10141016

10151017
Assertions.assertEquals(expectedEncodedQuery, uri.getRawQuery());

0 commit comments

Comments
 (0)