Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,32 @@ public class PercentCodec {
RFC5987_UNRESERVED.set('~');
}

static final BitSet PCHAR = new BitSet(256);
static final BitSet USERINFO = new BitSet(256);
static final BitSet REG_NAME = new BitSet(256);
static final BitSet PATH_SEGMENT = new BitSet(256);
static final BitSet QUERY = new BitSet(256);
static final BitSet FRAGMENT = new BitSet(256);

static {
PCHAR.or(UNRESERVED);
PCHAR.or(SUB_DELIMS);
PCHAR.set(':');
PCHAR.set('@');
USERINFO.or(UNRESERVED);
USERINFO.or(SUB_DELIMS);
USERINFO.set(':');
REG_NAME.or(UNRESERVED);
REG_NAME.or(SUB_DELIMS);
PATH_SEGMENT.or(PCHAR);
QUERY.or(PCHAR);
QUERY.set('/');
QUERY.set('?');
FRAGMENT.or(PCHAR);
FRAGMENT.set('/');
FRAGMENT.set('?');
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturobernalg I cannot find anything in the RFC stating that. What am I missing? What a super strict mode we have ALL_RESERVED

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturobernalg I cannot find anything in the RFC stating that. What am I missing? What a super strict mode we have ALL_RESERVED

@olegk The test expects (, ), & to remain percent-encoded in path segments (e.g., blah-%28%20-blah-%20%26%20-blah-%20%29-blah/), but without the PATH_SEGMENT.clear('('), clear(')'), clear('&') in PercentCodec.java, it outputs blah-(%20-blah-%20&%20-blah-%20)-blah/. Reintroducing the clear() calls fixes it, but I’m wondering if this aligns with our intended behavior or if we should adjust formatPath to handle encoding differently based on encodingPolicy.
I noticed the older formatPath used PercentCodec.encode(buf, segment, charset, false) with UNRESERVED, which worked, but the new version with encodingPolicy support might need tweaking. Could you take a look and advise?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arturobernalg You have overlooked static formatQuery and formatPath methods. They should also take the encoding policy into account. Apply this patch and everything should work

===================================================================
diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java b/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java
--- a/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java	(revision 53477ecc93d331f8348a6748a918d86b25666471)
+++ b/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java	(date 1742025384945)
@@ -137,10 +137,6 @@
         FRAGMENT.or(PCHAR);
         FRAGMENT.set('/');
         FRAGMENT.set('?');
-        // Some sub-delims remain encoded (RFC 3986 allows them unencoded, but we choose to be strict).
-        PATH_SEGMENT.clear('(');
-        PATH_SEGMENT.clear(')');
-        PATH_SEGMENT.clear('&');
     }
 
     private static final int RADIX = 16;
Index: httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java
--- a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java	(revision 53477ecc93d331f8348a6748a918d86b25666471)
+++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java	(date 1742026397263)
@@ -34,6 +34,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.BitSet;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
@@ -343,19 +344,25 @@
         return list;
     }
 
-    static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless, final Charset charset) {
+    static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless,
+                           final Charset charset, final BitSet safechars) {
         int i = 0;
         for (final String segment : segments) {
             if (i > 0 || !rootless) {
                 buf.append(PATH_SEPARATOR);
             }
-            PercentCodec.encode(buf, segment, charset, PercentCodec.PATH_SEGMENT, false);
+            PercentCodec.encode(buf, segment, charset, safechars, false);
             i++;
         }
     }
 
-    static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params, final Charset charset,
-                            final boolean blankAsPlus) {
+    static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless,
+                           final Charset charset) {
+        formatPath(buf, segments, rootless, charset, PercentCodec.UNRESERVED);
+    }
+
+    static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params,
+                            final Charset charset, final BitSet safechars, final boolean blankAsPlus) {
         int i = 0;
         for (final NameValuePair parameter : params) {
             if (i > 0) {
@@ -364,12 +371,17 @@
             PercentCodec.encode(buf, parameter.getName(), charset, blankAsPlus);
             if (parameter.getValue() != null) {
                 buf.append(PARAM_VALUE_SEPARATOR);
-                PercentCodec.encode(buf, parameter.getValue(), charset, blankAsPlus);
+                PercentCodec.encode(buf, parameter.getValue(), charset, safechars, blankAsPlus);
             }
             i++;
         }
     }
 
+    static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params,
+                            final Charset charset, final boolean blankAsPlus) {
+        formatQuery(buf, params, charset, PercentCodec.UNRESERVED, blankAsPlus);
+    }
+
     /**
      * Builds a {@link URI} instance.
      */
@@ -429,13 +441,15 @@
                 }
                 sb.append(this.encodedPath);
             } else if (this.pathSegments != null) {
-                formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset);
+                formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset,
+                        encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.PATH_SEGMENT);
             }
             if (this.encodedQuery != null) {
                 sb.append("?").append(this.encodedQuery);
             } else if (this.queryParams != null && !this.queryParams.isEmpty()) {
                 sb.append("?");
-                formatQuery(sb, this.queryParams, this.charset, false);
+                formatQuery(sb, this.queryParams, this.charset,
+                        encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.QUERY, false);
             } else if (this.query != null) {
                 sb.append("?");
                 PercentCodec.encode(sb, this.query, this.charset,


private static final int RADIX = 16;

static void encode(final StringBuilder buf, final CharSequence content, final Charset charset,
Expand Down
93 changes: 79 additions & 14 deletions httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
Expand Down Expand Up @@ -87,9 +88,36 @@ public static URIBuilder loopbackAddress() {
private Charset charset;
private String fragment;
private String encodedFragment;
private EncodingPolicy encodingPolicy = EncodingPolicy.ALL_RESERVED;

private boolean plusAsBlank;

/**
* Defines the encoding policy for URI components in {@link URIBuilder}.
* This enum controls how characters are percent-encoded when constructing a URI,
* allowing flexibility between strict encoding and RFC 3986-compliant behavior.
*
* @since 5.4
*/
public enum EncodingPolicy {
/**
* Encodes all reserved characters, allowing only unreserved characters
* (ALPHA, DIGIT, "-", ".", "_", "~") to remain unencoded. This is a strict
* policy suitable for conservative URI production where maximum encoding
* is desired.
*/
ALL_RESERVED,

/**
* Follows RFC 3986 component-specific encoding rules. For example, query and
* fragment components allow unreserved characters, sub-delimiters ("!", "$",
* "&", "'", "(", ")", "*", "+", ",", ";", "="), and additional characters
* (":", "@", "/", "?") to remain unencoded, as defined by {@code PercentCodec.FRAGMENT}.
* This policy ensures compliance with RFC 3986 while maintaining interoperability.
*/
RFC_3986
}

/**
* Constructs an empty instance.
*/
Expand Down Expand Up @@ -175,6 +203,22 @@ public URIBuilder setCharset(final Charset charset) {
return this;
}

/**
* Sets the encoding policy for this {@link URIBuilder}.
* The encoding policy determines how URI components (e.g., query, fragment) are
* percent-encoded when building the URI string. If not set, the default policy
* is {@link EncodingPolicy#RFC_3986}.
*
* @param encodingPolicy the encoding policy to apply, or {@code null} to reset
* to the default ({@link EncodingPolicy#ALL_RESERVED})
* @return this {@link URIBuilder} instance for method chaining
* @since 5.4
*/
public URIBuilder setEncodingPolicy(final EncodingPolicy encodingPolicy) {
this.encodingPolicy = encodingPolicy;
return this;
}

/**
* Gets the authority.
*
Expand Down Expand Up @@ -300,33 +344,46 @@ static List<String> parsePath(final CharSequence s, final Charset charset) {
return list;
}

static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless, final Charset charset) {
static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless,
final Charset charset, final BitSet safechars) {
int i = 0;
for (final String segment : segments) {
if (i > 0 || !rootless) {
buf.append(PATH_SEPARATOR);
}
PercentCodec.encode(buf, segment, charset);
PercentCodec.encode(buf, segment, charset, safechars, false);
i++;
}
}

static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params, final Charset charset,
final boolean blankAsPlus) {
static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless,
final Charset charset) {
formatPath(buf, segments, rootless, charset, PercentCodec.UNRESERVED);
}


static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params,
final Charset charset, final BitSet safechars, final boolean blankAsPlus) {
int i = 0;
for (final NameValuePair parameter : params) {
if (i > 0) {
buf.append(QUERY_PARAM_SEPARATOR);
}
PercentCodec.encode(buf, parameter.getName(), charset, blankAsPlus);
PercentCodec.encode(buf, parameter.getName(), charset, safechars, blankAsPlus);
if (parameter.getValue() != null) {
buf.append(PARAM_VALUE_SEPARATOR);
PercentCodec.encode(buf, parameter.getValue(), charset, blankAsPlus);
PercentCodec.encode(buf, parameter.getValue(), charset, safechars, blankAsPlus);
}
i++;
}
}

static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params,
final Charset charset, final boolean blankAsPlus) {
formatQuery(buf, params, charset, PercentCodec.UNRESERVED, blankAsPlus);
}


/**
* Builds a {@link URI} instance.
*/
Expand Down Expand Up @@ -356,18 +413,22 @@ private String buildString() {
} else if (this.userInfo != null) {
final int idx = this.userInfo.indexOf(':');
if (idx != -1) {
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset);
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
sb.append(':');
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset);
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
} else {
PercentCodec.encode(sb, this.userInfo, this.charset);
PercentCodec.encode(sb, this.userInfo, this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
}
sb.append("@");
}
if (InetAddressUtils.isIPv6(this.host)) {
sb.append("[").append(this.host).append("]");
} else {
PercentCodec.encode(sb, this.host, this.charset);
PercentCodec.encode(sb, this.host, this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.REG_NAME, false);
}
if (this.port >= 0) {
sb.append(":").append(this.port);
Expand All @@ -382,23 +443,27 @@ private String buildString() {
}
sb.append(this.encodedPath);
} else if (this.pathSegments != null) {
formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset);
formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.PATH_SEGMENT);
}
if (this.encodedQuery != null) {
sb.append("?").append(this.encodedQuery);
} else if (this.queryParams != null && !this.queryParams.isEmpty()) {
sb.append("?");
formatQuery(sb, this.queryParams, this.charset, false);
formatQuery(sb, this.queryParams, this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.QUERY, false);
} else if (this.query != null) {
sb.append("?");
PercentCodec.encode(sb, this.query, this.charset, PercentCodec.URIC, false);
PercentCodec.encode(sb, this.query, this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.URIC : PercentCodec.QUERY, false);
}
}
if (this.encodedFragment != null) {
sb.append("#").append(this.encodedFragment);
} else if (this.fragment != null) {
sb.append("#");
PercentCodec.encode(sb, this.fragment, this.charset, PercentCodec.URIC, false);
PercentCodec.encode(sb, this.fragment, this.charset,
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.URIC : PercentCodec.FRAGMENT, false);
}
return sb.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ void testFormatQuery() {
@Test
void testHierarchicalUri() throws Exception {
final URI uri = new URI("http", "stuff", "localhost", 80, "/some stuff", "param=stuff", "fragment");
final URIBuilder uribuilder = new URIBuilder(uri);
final URIBuilder uribuilder = new URIBuilder(uri).setEncodingPolicy(URIBuilder.EncodingPolicy.ALL_RESERVED);
final URI result = uribuilder.build();
Assertions.assertEquals(new URI("http://stuff@localhost:80/some%20stuff?param=stuff#fragment"), result);
}
Expand Down Expand Up @@ -997,4 +997,20 @@ void testSetPlusAsBlank() throws Exception {
params = uriBuilder.getQueryParams();
Assertions.assertEquals("hello world", params.get(0).getValue());
}

@Test
void testCustomQueryEncoding() throws Exception {
final String query = "query param:!@/?\"";
final String expectedEncodedQuery = "query%20param:!@/?%22";

final URI uri = new URIBuilder()
.setScheme("http")
.setHost("example.com")
.setCustomQuery(query)
.setEncodingPolicy(URIBuilder.EncodingPolicy.RFC_3986)
.build();

Assertions.assertEquals(expectedEncodedQuery, uri.getRawQuery());
}

}