From 6294189e90b2cc20c6d331100af6e053725036cd Mon Sep 17 00:00:00 2001 From: Arturo Bernal Date: Tue, 25 Feb 2025 09:01:13 +0100 Subject: [PATCH] HTTPCLIENT-2360 - Enhance filename encoding in multipart/form-data per RFC 6266/5987 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Modified FormBodyPartBuilder to support HttpMultipartMode, adding filename* with UTF-8 encoding for non-ISO-8859-1 filenames in STRICT/EXTENDED modes, skipping it in LEGACY mode. - Updated HttpRFC7578Multipart to use mode for filename encoding: percent-encode in EXTENDED, ISO-8859-1 in STRICT/LEGACY, and always encode filename* per RFC 5987. - Adjusted MultipartEntityBuilder to propagate mode to FormBodyPartBuilder, ensuring consistent behavior across the pipeline. - Fixed tests to align with mode-specific expectations, maintaining LEGACY mode’s raw UTF-8 filename behavior. --- .../http/entity/mime/FormBodyPartBuilder.java | 76 +++++++++++++++++-- .../entity/mime/HttpRFC7578Multipart.java | 29 ++++--- .../entity/mime/MultipartEntityBuilder.java | 2 +- .../http/entity/mime/TestMultipartForm.java | 4 +- 4 files changed, 94 insertions(+), 17 deletions(-) diff --git a/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/FormBodyPartBuilder.java b/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/FormBodyPartBuilder.java index 72405cfc84..31d73cccbc 100644 --- a/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/FormBodyPartBuilder.java +++ b/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/FormBodyPartBuilder.java @@ -27,12 +27,15 @@ package org.apache.hc.client5.http.entity.mime; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import org.apache.hc.core5.http.ContentType; import org.apache.hc.core5.http.NameValuePair; import org.apache.hc.core5.http.message.BasicNameValuePair; +import org.apache.hc.core5.net.PercentCodec; import org.apache.hc.core5.util.Args; import org.apache.hc.core5.util.Asserts; @@ -47,22 +50,52 @@ public class FormBodyPartBuilder { private ContentBody body; private final Header header; + /** + * The multipart mode determining how filenames are encoded in the {@code Content-Disposition} + * header, defaults to {@link HttpMultipartMode#STRICT}. + * + * @since 5.5 + */ + private HttpMultipartMode mode; + + /** + * Encoder used to check if strings can be encoded in ISO-8859-1, supporting filename + * compatibility determinations in multipart form data. + */ + private CharsetEncoder iso8859_1Encoder; + + /** + * Creates a new builder instance with the specified name, content body, and multipart mode. + * + * @param name the name of the form field + * @param body the content body of the part + * @param mode the {@link HttpMultipartMode} to use, determining filename encoding behavior; + * + * @return a new {@code FormBodyPartBuilder} instance + * @since 5.5 + */ + public static FormBodyPartBuilder create(final String name, final ContentBody body, final HttpMultipartMode mode) { + return new FormBodyPartBuilder(name, body, mode); + } + public static FormBodyPartBuilder create(final String name, final ContentBody body) { - return new FormBodyPartBuilder(name, body); + return new FormBodyPartBuilder(name, body, HttpMultipartMode.STRICT); } public static FormBodyPartBuilder create() { return new FormBodyPartBuilder(); } - FormBodyPartBuilder(final String name, final ContentBody body) { + FormBodyPartBuilder(final String name, final ContentBody body, final HttpMultipartMode mode) { this(); this.name = name; this.body = body; + this.mode = mode != null ? mode : HttpMultipartMode.STRICT; } FormBodyPartBuilder() { this.header = new Header(); + this.mode = HttpMultipartMode.STRICT; } public FormBodyPartBuilder setName(final String name) { @@ -102,6 +135,35 @@ public FormBodyPartBuilder removeFields(final String name) { return this; } + /** + * Determines whether the given string can be encoded in ISO-8859-1 without loss of data. + * This is used to decide whether the {@code filename} parameter can be used as-is or if + * the {@code filename*} parameter is needed for non-ISO-8859-1 characters. + * + * @param input the string to check, must not be {@code null} + * @return {@code true} if the string can be encoded in ISO-8859-1, {@code false} otherwise + * @since 5.5 + */ + private boolean canEncodeToISO8859_1(final String input) { + if (iso8859_1Encoder == null) { + iso8859_1Encoder = StandardCharsets.ISO_8859_1.newEncoder(); + } + return iso8859_1Encoder.canEncode(input); + } + + /** + * Encodes the given filename according to RFC 5987, prefixing it with {@code UTF-8''} and + * applying percent-encoding to non-ASCII characters. This is used for the {@code filename*} + * parameter in the {@code Content-Disposition} header when non-ISO-8859-1 characters are present. + * + * @param filename the filename to encode, must not be {@code null} + * @return the RFC 5987-encoded string, e.g., {@code UTF-8''example%20text} + * @since 5.5 + */ + private static String encodeRFC5987(final String filename) { + return "UTF-8''" + PercentCodec.RFC5987.encode(filename); + } + public FormBodyPart build() { Asserts.notBlank(this.name, "Name"); Asserts.notNull(this.body, "Content body"); @@ -114,7 +176,12 @@ public FormBodyPart build() { final List fieldParameters = new ArrayList<>(); fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_NAME, this.name)); if (this.body.getFilename() != null) { - fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_FILENAME, this.body.getFilename())); + final String filename = this.body.getFilename(); + fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_FILENAME, filename)); + // Add filename* only if non-ISO-8859-1 and not in LEGACY mode + if (mode != HttpMultipartMode.LEGACY && !canEncodeToISO8859_1(filename)) { + fieldParameters.add(new BasicNameValuePair(MimeConsts.FIELD_PARAM_FILENAME_START, encodeRFC5987(filename))); + } } headerCopy.addField(new MimeField(MimeConsts.CONTENT_DISPOSITION, "form-data", fieldParameters)); } @@ -139,5 +206,4 @@ public FormBodyPart build() { } return new FormBodyPart(this.name, this.body, headerCopy); } - -} +} \ No newline at end of file diff --git a/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/HttpRFC7578Multipart.java b/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/HttpRFC7578Multipart.java index ff42b7746a..3e5fd010d2 100644 --- a/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/HttpRFC7578Multipart.java +++ b/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/HttpRFC7578Multipart.java @@ -40,6 +40,8 @@ class HttpRFC7578Multipart extends AbstractMultipartFormat { private final List parts; + private final HttpMultipartMode mode; + /** * Constructs a new instance of {@code HttpRFC7578Multipart} with the given charset, boundary, parts, preamble, and epilogue. * @@ -54,9 +56,11 @@ public HttpRFC7578Multipart( final String boundary, final List parts, final String preamble, - final String epilogue) { + final String epilogue, + final HttpMultipartMode mode) { super(charset, boundary, preamble, epilogue); this.parts = parts; + this.mode = mode != null ? mode : HttpMultipartMode.STRICT; // Default to STRICT } /** @@ -69,10 +73,12 @@ public HttpRFC7578Multipart( public HttpRFC7578Multipart( final Charset charset, final String boundary, - final List parts) { - this(charset,boundary,parts,null, null); + final List parts, + final HttpMultipartMode mode) { + this(charset,boundary,parts,null, null, mode); } + @Override public List getParts() { return parts; @@ -94,12 +100,17 @@ protected void formatMultipartHeader(final MultipartPart part, final OutputStrea writeBytes(name, out); writeBytes("=\"", out); if (value != null) { - if (name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME) || - name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME_START)) { - final String encodedValue = name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME_START) ? - "UTF-8''" + PercentCodec.RFC5987.encode(value) : PercentCodec.RFC5987.encode(value); - final byte[] encodedBytes = encodedValue.getBytes(StandardCharsets.US_ASCII); - out.write(encodedBytes); + if (name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME_START)) { + final String encodedValue = "UTF-8''" + PercentCodec.RFC5987.encode(value); + writeBytes(encodedValue, StandardCharsets.US_ASCII, out); + } else if (name.equalsIgnoreCase(MimeConsts.FIELD_PARAM_FILENAME)) { + if (mode == HttpMultipartMode.EXTENDED) { + final String encodedValue = PercentCodec.RFC5987.encode(value); + writeBytes(encodedValue, StandardCharsets.US_ASCII, out); + } else { + // Default to ISO-8859-1 for RFC 7578 compliance in STRICT/LEGACY + writeBytes(value, StandardCharsets.ISO_8859_1, out); + } } else { writeBytes(value, out); } diff --git a/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/MultipartEntityBuilder.java b/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/MultipartEntityBuilder.java index d9574361bd..d86ec95ca1 100644 --- a/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/MultipartEntityBuilder.java +++ b/httpclient5/src/main/java/org/apache/hc/client5/http/entity/mime/MultipartEntityBuilder.java @@ -289,7 +289,7 @@ MultipartFormEntity buildEntity() { if (charsetCopy == null) { charsetCopy = StandardCharsets.UTF_8; } - form = new HttpRFC7578Multipart(charsetCopy, boundaryCopy, multipartPartsCopy, preamble, epilogue); + form = new HttpRFC7578Multipart(charsetCopy, boundaryCopy, multipartPartsCopy, preamble, epilogue, modeCopy); } else { form = new HttpRFC6532Multipart(charsetCopy, boundaryCopy, multipartPartsCopy, preamble, epilogue); } diff --git a/httpclient5/src/test/java/org/apache/hc/client5/http/entity/mime/TestMultipartForm.java b/httpclient5/src/test/java/org/apache/hc/client5/http/entity/mime/TestMultipartForm.java index 054b3ade53..617092ea10 100644 --- a/httpclient5/src/test/java/org/apache/hc/client5/http/entity/mime/TestMultipartForm.java +++ b/httpclient5/src/test/java/org/apache/hc/client5/http/entity/mime/TestMultipartForm.java @@ -295,11 +295,11 @@ void testMultipartFormBrowserCompatibleNonASCIIHeaders() throws Exception { @SuppressWarnings("resource") final FormBodyPart p1 = FormBodyPartBuilder.create( "field1", - new InputStreamBody(new FileInputStream(tmpfile), s1 + ".tmp")).build(); + new InputStreamBody(new FileInputStream(tmpfile), s1 + ".tmp"), HttpMultipartMode.LEGACY).build(); @SuppressWarnings("resource") final FormBodyPart p2 = FormBodyPartBuilder.create( "field2", - new InputStreamBody(new FileInputStream(tmpfile), s2 + ".tmp")).build(); + new InputStreamBody(new FileInputStream(tmpfile), s2 + ".tmp"), HttpMultipartMode.LEGACY).build(); final LegacyMultipart multipart = new LegacyMultipart( StandardCharsets.UTF_8, "foo", Arrays.asList(p1, p2));