From ddf093ca2e9c83a6d7a063c58bf21c36394ec49f Mon Sep 17 00:00:00 2001 From: Arturo Bernal Date: Fri, 10 Oct 2025 14:46:22 +0200 Subject: [PATCH 1/2] RFC6874 zone IDs with minimal parsing Bracket/encode only; treat IPv6 literal opaquely, decode/validate ZoneID; keep colon-count heuristic. --- .../org/apache/hc/core5/http/HttpHost.java | 20 +- .../org/apache/hc/core5/net/URIAuthority.java | 112 +++++++++- .../org/apache/hc/core5/net/URIBuilder.java | 12 +- .../apache/hc/core5/net/ZoneIdSupport.java | 205 ++++++++++++++++++ .../org/apache/hc/core5/util/TextUtils.java | 17 ++ .../hc/core5/net/TestInetAddressUtils.java | 68 ++++++ 6 files changed, 419 insertions(+), 15 deletions(-) create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/ZoneIdSupport.java diff --git a/httpcore5/src/main/java/org/apache/hc/core5/http/HttpHost.java b/httpcore5/src/main/java/org/apache/hc/core5/http/HttpHost.java index 018c543b3b..daf9e5d288 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/http/HttpHost.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/http/HttpHost.java @@ -38,6 +38,7 @@ import org.apache.hc.core5.net.Host; import org.apache.hc.core5.net.NamedEndpoint; import org.apache.hc.core5.net.URIAuthority; +import org.apache.hc.core5.net.ZoneIdSupport; import org.apache.hc.core5.util.Args; import org.apache.hc.core5.util.LangUtils; import org.apache.hc.core5.util.TextUtils; @@ -303,13 +304,24 @@ public InetAddress getAddress() { */ public String toURI() { final StringBuilder buffer = new StringBuilder(); - buffer.append(this.schemeName); - buffer.append("://"); - buffer.append(this.host.toString()); + buffer.append(this.schemeName).append("://"); + + final String hostname = this.host.getHostName(); + final int port = this.host.getPort(); + + // Bracket only real IPv6 literals; decide using the address part only (ignore zone) + if (ZoneIdSupport.looksLikeIPv6AddressPart(hostname)) { + ZoneIdSupport.appendBracketedIPv6(buffer, hostname); + if (port >= 0) { + buffer.append(':').append(port); + } + } else { + // reg-name / IPv4 / special forms like "host:80" for CONNECT + buffer.append(this.host); + } return buffer.toString(); } - /** * Obtains the host string, without scheme prefix. * diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java index 86288ebac9..6e455748ae 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java @@ -51,9 +51,19 @@ public final class URIAuthority implements NamedEndpoint, Serializable { private final String userInfo; private final Host host; + static URIAuthority parse(final CharSequence s) throws URISyntaxException { + if (TextUtils.isBlank(s)) { + return null; + } + final Tokenizer.Cursor cursor = new Tokenizer.Cursor(0, s.length()); + return parse(s, cursor); // intentionally no cursor.atEnd() check + } + static URIAuthority parse(final CharSequence s, final Tokenizer.Cursor cursor) throws URISyntaxException { final Tokenizer tokenizer = Tokenizer.INSTANCE; String userInfo = null; + + // optional userinfo@ final int initPos = cursor.getPos(); final String token = tokenizer.parseContent(s, cursor, URISupport.HOST_DELIMITERS); if (!cursor.atEnd() && s.charAt(cursor.getPos()) == '@') { @@ -62,26 +72,112 @@ static URIAuthority parse(final CharSequence s, final Tokenizer.Cursor cursor) t userInfo = token; } } else { - //Rewind cursor.updatePos(initPos); } + + if (!cursor.atEnd() && s.charAt(cursor.getPos()) == '[') { + final int lb = cursor.getPos(); + final int upper = cursor.getUpperBound(); + int rb = -1; + for (int i = lb + 1; i < upper; i++) { + if (s.charAt(i) == ']') { + rb = i; + break; + } + } + if (rb < 0) { + throw URISupport.createException(s.toString(), cursor, "Expected closing bracket for IPv6 address"); + } + + final String literal = s.subSequence(lb + 1, rb).toString(); + final int zoneMark = literal.indexOf("%25"); + final String addrPart = zoneMark >= 0 ? literal.substring(0, zoneMark) : literal; + + int colons = 0; + for (int i = 0; i < addrPart.length(); i++) { + if (addrPart.charAt(i) == ':') { + if (++colons >= 2) { + break; + } + } + } + if (colons < 2) { + throw URISupport.createException(s.toString(), cursor, "Expected an IPv6 address"); + } + + if (zoneMark >= 0) { + final String zoneEnc = literal.substring(zoneMark + 3); + ZoneIdSupport.validateZoneIdEncoded(zoneEnc); + } + // Store host in friendly form: "...%" (or literal as-is if no zone) + final String hostName = ZoneIdSupport.decodeZoneId(literal); + + // optional :port + int pos = rb + 1; + int port = -1; + if (pos < upper && s.charAt(pos) == ':') { + pos++; + if (pos >= upper || !Character.isDigit(s.charAt(pos))) { + throw URISupport.createException(s.toString(), cursor, "Invalid port"); + } + long acc = 0; + while (pos < upper && Character.isDigit(s.charAt(pos))) { + acc = acc * 10 + (s.charAt(pos) - '0'); + if (acc > 65535) { + throw URISupport.createException(s.toString(), cursor, "Port out of range"); + } + pos++; + } + port = (int) acc; + } + cursor.updatePos(pos); + return new URIAuthority(userInfo, hostName, port); + } + + { + final int start = cursor.getPos(); + final int upper = cursor.getUpperBound(); + int i = start; + int colonCount = 0; + while (i < upper) { + final char ch = s.charAt(i); + if (ch == '/' || ch == '?' || ch == '#') { + break; // end of authority + } + if (ch == ']') { + break; // safety + } + if (ch == ':') { + if (++colonCount > 1) { + throw URISupport.createException(s.toString(), cursor, "Expected an IPv6 address"); + } + } + i++; + } + } + final Host host = Host.parse(s, cursor); return new URIAuthority(userInfo, host); } - static URIAuthority parse(final CharSequence s) throws URISyntaxException { - final Tokenizer.Cursor cursor = new Tokenizer.Cursor(0, s.length()); - return parse(s, cursor); - } static void format(final StringBuilder buf, final URIAuthority uriAuthority) { if (uriAuthority.getUserInfo() != null) { - buf.append(uriAuthority.getUserInfo()); - buf.append("@"); + buf.append(uriAuthority.getUserInfo()).append("@"); + } + final String hostName = uriAuthority.getHostName(); + final int port = uriAuthority.getPort(); + + if (ZoneIdSupport.appendBracketedIPv6(buf, hostName)) { + if (port >= 0) { + buf.append(':').append(port); + } + } else { + Host.format(buf, uriAuthority); } - Host.format(buf, uriAuthority); } + static String format(final URIAuthority uriAuthority) { final StringBuilder buf = new StringBuilder(); format(buf, uriAuthority); diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java index cbc41993ce..febe0f5cbd 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java @@ -424,14 +424,16 @@ private String buildString() { } sb.append("@"); } - if (InetAddressUtils.isIPv6(this.host)) { - sb.append("[").append(this.host).append("]"); + + // Bracket only true IPv6 hosts; decide based on address part only (ignore zone) + if (ZoneIdSupport.appendBracketedIPv6(sb, this.host)) { + // wrote [IPv6%25zone] } else { PercentCodec.encode(sb, this.host, this.charset, encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.REG_NAME, false); } if (this.port >= 0) { - sb.append(":").append(this.port); + sb.append(':').append(this.port); } authoritySpecified = true; } else { @@ -478,6 +480,10 @@ private void digestURI(final URI uri, final Charset charset) { this.host = uriHost != null && InetAddressUtils.isIPv6URLBracketed(uriHost) ? uriHost.substring(1, uriHost.length() - 1) : uriHost; + + // Normalize zone-id to user-friendly form: "...%25zone" -> "...%zone" (and decode %HH in zone) + this.host = ZoneIdSupport.decodeZoneId(this.host); + this.port = uri.getPort(); this.encodedUserInfo = uri.getRawUserInfo(); this.userInfo = uri.getUserInfo(); diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/ZoneIdSupport.java b/httpcore5/src/main/java/org/apache/hc/core5/net/ZoneIdSupport.java new file mode 100644 index 0000000000..2b644b15cc --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/ZoneIdSupport.java @@ -0,0 +1,205 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ +package org.apache.hc.core5.net; + +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.util.TextUtils; + +@Internal +public final class ZoneIdSupport { + + private ZoneIdSupport() { + } + + /** + * RFC 6874 encoder for ZoneID: emits unreserved characters as-is and percent-encodes + * everything else using UTF-8 with UPPERCASE hex digits. Existing %HH triplets are + * passed through unchanged. + */ + public static String encodeZoneIdRfc6874(final CharSequence raw) { + if (raw == null || raw.length() == 0) { + return raw != null ? raw.toString() : null; + } + final StringBuilder out = new StringBuilder(raw.length() + 8); + for (int i = 0; i < raw.length(); i++) { + final char ch = raw.charAt(i); + if (unreserved(ch)) { + out.append(ch); + } else if (ch == '%' && i + 2 < raw.length() + && TextUtils.isHex(raw.charAt(i + 1)) && TextUtils.isHex(raw.charAt(i + 2))) { + // pass through existing %HH + out.append('%').append(raw.charAt(i + 1)).append(raw.charAt(i + 2)); + i += 2; + } else { + final byte[] bytes = String.valueOf(ch).getBytes(java.nio.charset.StandardCharsets.UTF_8); + final String hex = org.apache.hc.core5.util.TextUtils.toHexString(bytes) + .toUpperCase(java.util.Locale.ROOT); + for (int k = 0; k < hex.length(); k += 2) { + out.append('%').append(hex.charAt(k)).append(hex.charAt(k + 1)); + } + } + } + return out.toString(); + } + + /** + * RFC 6874 decoder for bracket contents of an IPv6 literal. + * Input: {@code "addr%25"} → Output internal form: {@code "addr%"}. + * If there is no {@code "%25"} delimiter, returns the input as-is. + */ + public static String decodeZoneId(final CharSequence host) { + if (host == null) { + return null; + } + // find "%25" + int p = -1; + for (int i = 0; i + 2 < host.length(); i++) { + if (host.charAt(i) == '%' && host.charAt(i + 1) == '2' && host.charAt(i + 2) == '5') { + p = i; + break; + } + } + if (p < 0) { + return host.toString(); + } + final CharSequence addrCs = host.subSequence(0, p); + final CharSequence encZone = host.subSequence(p + 3, host.length()); + + final java.io.ByteArrayOutputStream baos = + new java.io.ByteArrayOutputStream(encZone.length()); + for (int i = 0; i < encZone.length(); i++) { + final char ch = encZone.charAt(i); + if (ch == '%' && i + 2 < encZone.length() + && TextUtils.isHex(encZone.charAt(i + 1)) && TextUtils.isHex(encZone.charAt(i + 2))) { + final int hi = Character.digit(encZone.charAt(i + 1), 16); + final int lo = Character.digit(encZone.charAt(i + 2), 16); + baos.write((hi << 4) + lo); + i += 2; + } else { + // Allowed unreserved in ZoneID are ASCII; copy as single byte + baos.write((byte) ch); + } + } + final String zone = new String(baos.toByteArray(), java.nio.charset.StandardCharsets.UTF_8); + return addrCs.toString() + '%' + zone; + } + + /** + * RFC 6874 ZoneID validator: + *
ZoneID = 1*( unreserved / pct-encoded )
+ * Throws {@link IllegalArgumentException} on invalid input. + */ + public static void validateZoneIdEncoded(final CharSequence enc) { + if (enc == null || enc.length() == 0) { + throw new IllegalArgumentException("ZoneID must not be empty"); + } + for (int i = 0; i < enc.length(); i++) { + final char ch = enc.charAt(i); + if (unreserved(ch)) { + continue; + } + if (ch == '%' && i + 2 < enc.length() + && TextUtils.isHex(enc.charAt(i + 1)) && TextUtils.isHex(enc.charAt(i + 2))) { + i += 2; + continue; + } + throw new IllegalArgumentException("Illegal character in ZoneID"); + } + } + + /** + * Heuristic: returns {@code true} if {@code host} looks like an IPv6 address-part + * (i.e., before any ZoneID) by counting colons. We do not parse/validate IPv6; + * this keeps our surface minimal while still bracketing correctly. + *

Rule: if the address-part (up to '%', if present) contains >= 2 colons, + * treat it as IPv6-like.

+ */ + public static boolean looksLikeIPv6AddressPart(final CharSequence host) { + if (host == null) { + return false; + } + int end = host.length(); + for (int i = 0; i < end; i++) { + if (host.charAt(i) == '%') { + end = i; + break; + } + } + int colons = 0; + for (int i = 0; i < end; i++) { + if (host.charAt(i) == ':') { + colons++; + if (colons >= 2) { + return true; + } + } + } + return false; + } + + /** + * Appends a bracketed IPv6 literal to {@code buf} if {@code host} looks like IPv6. + * If a ZoneID is present (after '%'), it is written as {@code "%25"} followed by the + * RFC 6874-encoded ZoneID. Returns {@code true} iff it wrote the bracketed literal. + */ + public static boolean appendBracketedIPv6(final StringBuilder buf, final CharSequence host) { + if (!looksLikeIPv6AddressPart(host)) { + return false; + } + // address part + int zoneIdx = -1; + for (int i = 0; i < host.length(); i++) { + if (host.charAt(i) == '%') { + zoneIdx = i; + break; + } + } + buf.append('['); + if (zoneIdx >= 0) { + buf.append(host, 0, zoneIdx); + } else { + buf.append(host); + } + // zone part + if (zoneIdx >= 0) { + final CharSequence zone = host.subSequence(zoneIdx + 1, host.length()); + buf.append("%25").append(encodeZoneIdRfc6874(zone)); + } + buf.append(']'); + return true; + } + + /** + * RFC 3986 unreserved characters. + */ + private static boolean unreserved(final char ch) { + return ch >= 'A' && ch <= 'Z' + || ch >= 'a' && ch <= 'z' + || ch >= '0' && ch <= '9' + || ch == '-' || ch == '.' || ch == '_' || ch == '~'; + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java b/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java index dffeaf58f3..e36ab710d3 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/util/TextUtils.java @@ -188,4 +188,21 @@ public static byte castAsByte(final int c) { return '?'; } + /** + * Tests whether the given character is an ASCII hexadecimal digit. + *

+ * Accepts {@code '0'..'9'}, {@code 'A'..'F'}, and {@code 'a'..'f'} only. + * This method does not consider non-ASCII numerals or fullwidth forms. + * + * @param c the character to test + * @return {@code true} if {@code c} is an ASCII hex digit, {@code false} otherwise + * @since 5.4 + */ + public static boolean isHex(final char c) { + return c >= '0' && c <= '9' + || c >= 'A' && c <= 'F' + || c >= 'a' && c <= 'f'; + } + + } diff --git a/httpcore5/src/test/java/org/apache/hc/core5/net/TestInetAddressUtils.java b/httpcore5/src/test/java/org/apache/hc/core5/net/TestInetAddressUtils.java index 3471c2d92c..3c1541fce3 100644 --- a/httpcore5/src/test/java/org/apache/hc/core5/net/TestInetAddressUtils.java +++ b/httpcore5/src/test/java/org/apache/hc/core5/net/TestInetAddressUtils.java @@ -27,6 +27,10 @@ package org.apache.hc.core5.net; +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hc.core5.http.HttpHost; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -215,5 +219,69 @@ void testInvalidIPv4MappedIPv6AddressWithBadOctets() { Assertions.assertFalse(InetAddressUtils.isIPv4MappedIPv6("::ffff:0255.000.000.000")); } + @Test + void parseAuthorityWithZoneId_decodesDelimiter() throws URISyntaxException { + final URIAuthority a = URIAuthority.parse("[fe80::1%25eth0]:8080"); + Assertions.assertNotNull(a); + Assertions.assertEquals("fe80::1%eth0", a.getHostName()); + Assertions.assertEquals(8080, a.getPort()); + Assertions.assertNull(a.getUserInfo()); + } + + @Test + void formatAuthorityWithZoneId_emitsPercent25() { + final URIAuthority a = new URIAuthority(null, "fe80::1%eth0", 8080); + Assertions.assertEquals("[fe80::1%25eth0]:8080", a.toString()); + } + + @Test + void httpHost_toURI_formatsZoneId() { + final HttpHost h = new HttpHost("http", "fe80::1%eth0", 8080); + Assertions.assertEquals("http://[fe80::1%25eth0]:8080", h.toURI()); + } + + @Test + void uriBuilder_roundTrip_zoneId() throws Exception { + final URI u = new URI("http://[fe80::1%25eth0]:8080/path?q=1"); + final URIBuilder b = new URIBuilder(u); + Assertions.assertEquals("fe80::1%eth0", b.getHost()); + final URI rebuilt = b.build(); + Assertions.assertEquals("http://[fe80::1%25eth0]:8080/path?q=1", rebuilt.toASCIIString()); + } + + @Test + void zoneId_validation_rejects_bad_pct() { + // empty zone — invalid + Assertions.assertThrows(IllegalArgumentException.class, + () -> URIAuthority.parse("[fe80::1%25]:80")); + + // dangling percent-triplet — invalid + Assertions.assertThrows(IllegalArgumentException.class, + () -> URIAuthority.parse("[fe80::1%25%]:80")); + + // non-hex in percent-triplet — invalid + Assertions.assertThrows(IllegalArgumentException.class, + () -> URIAuthority.parse("[fe80::1%25%G1]:80")); + + // character not in RFC 3986 "unreserved" — invalid + Assertions.assertThrows(IllegalArgumentException.class, + () -> URIAuthority.parse("[fe80::1%25!]:80")); + + } + + @Test + void zoneId_allows_unreserved_and_pct() throws URISyntaxException { + final URIAuthority a = URIAuthority.parse("[fe80::1%25en1-._~x%20]:443"); + Assertions.assertNotNull(a); + Assertions.assertEquals("fe80::1%en1-._~x ", a.getHostName()); + Assertions.assertEquals("[fe80::1%25en1-._~x%20]:443", a.toString()); + } + + @Test + void inetAddressUtils_helper_accepts_zone() { + Assertions.assertTrue(ZoneIdSupport.looksLikeIPv6AddressPart("fe80::1%eth0")); + Assertions.assertTrue(ZoneIdSupport.looksLikeIPv6AddressPart("fe80::1234:0:0:0:0:0%en1")); + Assertions.assertFalse(ZoneIdSupport.looksLikeIPv6AddressPart("not-an-ip")); + } } From 54d209954217510c57873ca73db31ec8513ffde4 Mon Sep 17 00:00:00 2001 From: Arturo Bernal Date: Sat, 11 Oct 2025 21:06:48 +0200 Subject: [PATCH 2/2] remove ipv6 parse --- .../org/apache/hc/core5/net/URIAuthority.java | 49 ++++--------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java index 6e455748ae..36c076d5e6 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URIAuthority.java @@ -80,37 +80,28 @@ static URIAuthority parse(final CharSequence s, final Tokenizer.Cursor cursor) t final int upper = cursor.getUpperBound(); int rb = -1; for (int i = lb + 1; i < upper; i++) { - if (s.charAt(i) == ']') { - rb = i; - break; - } + if (s.charAt(i) == ']') { rb = i; break; } } if (rb < 0) { throw URISupport.createException(s.toString(), cursor, "Expected closing bracket for IPv6 address"); } - final String literal = s.subSequence(lb + 1, rb).toString(); - final int zoneMark = literal.indexOf("%25"); - final String addrPart = zoneMark >= 0 ? literal.substring(0, zoneMark) : literal; + final int z = literal.indexOf("%25"); + final String addrPart = z >= 0 ? literal.substring(0, z) : literal; + // Minimal check: IPv6-like must have at least two colons int colons = 0; for (int i = 0; i < addrPart.length(); i++) { - if (addrPart.charAt(i) == ':') { - if (++colons >= 2) { - break; - } - } + if (addrPart.charAt(i) == ':' && ++colons >= 2) break; } if (colons < 2) { throw URISupport.createException(s.toString(), cursor, "Expected an IPv6 address"); } - if (zoneMark >= 0) { - final String zoneEnc = literal.substring(zoneMark + 3); - ZoneIdSupport.validateZoneIdEncoded(zoneEnc); + if (z >= 0) { + ZoneIdSupport.validateZoneIdEncoded(literal.substring(z + 3)); } - // Store host in friendly form: "...%" (or literal as-is if no zone) - final String hostName = ZoneIdSupport.decodeZoneId(literal); + final String hostName = ZoneIdSupport.decodeZoneId(literal); // "...%25zone" → "...%zone" // optional :port int pos = rb + 1; @@ -134,33 +125,13 @@ static URIAuthority parse(final CharSequence s, final Tokenizer.Cursor cursor) t return new URIAuthority(userInfo, hostName, port); } - { - final int start = cursor.getPos(); - final int upper = cursor.getUpperBound(); - int i = start; - int colonCount = 0; - while (i < upper) { - final char ch = s.charAt(i); - if (ch == '/' || ch == '?' || ch == '#') { - break; // end of authority - } - if (ch == ']') { - break; // safety - } - if (ch == ':') { - if (++colonCount > 1) { - throw URISupport.createException(s.toString(), cursor, "Expected an IPv6 address"); - } - } - i++; - } - } - + // Non-bracketed authority → existing fallback. final Host host = Host.parse(s, cursor); return new URIAuthority(userInfo, host); } + static void format(final StringBuilder buf, final URIAuthority uriAuthority) { if (uriAuthority.getUserInfo() != null) { buf.append(uriAuthority.getUserInfo()).append("@");