Skip to content

Commit 95f58c5

Browse files
committed
HTTPCLIENT-1843 Plug Commons-Compress into HttpClient’s automatic
content-decoding (optional) * New ContentDecoderRegistry discovers extra codecs (br, zstd, xz, lz4, …) via Commons-Compress when that jar is on the class-path; otherwise falls back to the built-ins (gzip, deflate) only. * No hard dependency added—projects that need the extra algorithms just add `commons-compress` (and helper jars like google-brotli, zstd-jni, xz-java) to their pom and HttpClient uses them automatically.
1 parent c5bd9af commit 95f58c5

10 files changed

Lines changed: 460 additions & 20 deletions

File tree

httpclient5/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@
108108
<artifactId>commons-io</artifactId>
109109
<scope>test</scope>
110110
</dependency>
111+
<dependency>
112+
<groupId>org.apache.commons</groupId>
113+
<artifactId>commons-compress</artifactId>
114+
<optional>true</optional>
115+
</dependency>
111116
</dependencies>
112117

113118
<build>

httpclient5/src/main/java/org/apache/hc/client5/http/entity/BrotliInputStreamFactory.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@
4141
@Contract(threading = ThreadingBehavior.STATELESS)
4242
public class BrotliInputStreamFactory implements InputStreamFactory {
4343

44+
/**
45+
* Canonical token for the deflate content-coding.
46+
* @since 5.6
47+
*/
48+
public static final String ENCODING = "br";
49+
50+
@Override
51+
public String getContentEncoding() {
52+
return ENCODING;
53+
}
54+
4455
/**
4556
* Default instance of {@link BrotliInputStreamFactory}.
4657
*/

httpclient5/src/main/java/org/apache/hc/client5/http/entity/DeflateInputStreamFactory.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@
4141
@Contract(threading = ThreadingBehavior.STATELESS)
4242
public class DeflateInputStreamFactory implements InputStreamFactory {
4343

44+
/**
45+
* Canonical token for the deflate content-coding.
46+
* @since 5.6
47+
*/
48+
public static final String ENCODING = "deflate";
49+
50+
@Override
51+
public String getContentEncoding() {
52+
return ENCODING;
53+
}
54+
4455
/**
4556
* Default instance of {@link DeflateInputStreamFactory}.
4657
*/

httpclient5/src/main/java/org/apache/hc/client5/http/entity/GZIPInputStreamFactory.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,17 @@
4242
@Contract(threading = ThreadingBehavior.STATELESS)
4343
public class GZIPInputStreamFactory implements InputStreamFactory {
4444

45+
/**
46+
* Canonical token for the gzip content-coding.
47+
* @since 5.6
48+
*/
49+
public static final String ENCODING = "gzip";
50+
51+
@Override
52+
public String getContentEncoding() {
53+
return ENCODING;
54+
}
55+
4556
/**
4657
* Default instance of {@link GZIPInputStreamFactory}.
4758
*/

httpclient5/src/main/java/org/apache/hc/client5/http/entity/InputStreamFactory.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,21 @@ public interface InputStreamFactory {
3838

3939
InputStream create(InputStream inputStream) throws IOException;
4040

41+
/**
42+
* Returns the canonical {@code Content-Encoding} token handled by this
43+
* factory (for example {@code "gzip"}, {@code "deflate"}, {@code "br"}).
44+
* <p>
45+
* Implementations that do <strong>not</strong> represent a HTTP
46+
* content-decoder should simply inherit the default implementation,
47+
* which returns an empty string.
48+
*
49+
* @return the lower-case encoding token, or an empty string when the
50+
* factory is not intended for HTTP content-decoding
51+
*
52+
* @since 5.6
53+
*/
54+
default String getContentEncoding() {
55+
return "";
56+
}
57+
4158
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
28+
package org.apache.hc.client5.http.entity.compress;
29+
30+
import java.io.IOException;
31+
import java.io.InputStream;
32+
import java.util.Locale;
33+
34+
import org.apache.commons.compress.compressors.CompressorException;
35+
import org.apache.commons.compress.compressors.CompressorStreamFactory;
36+
import org.apache.hc.client5.http.entity.InputStreamFactory;
37+
import org.apache.hc.core5.annotation.Contract;
38+
import org.apache.hc.core5.annotation.Internal;
39+
import org.apache.hc.core5.annotation.ThreadingBehavior;
40+
41+
/**
42+
* {@link InputStreamFactory} backed by
43+
* <a href="https://commons.apache.org/proper/commons-compress/">Apache&nbsp;Commons Compress</a>.
44+
* <p>
45+
* The class is compiled against Commons Compress but lives behind an <i>optional</i>
46+
* dependency. At run-time it will only be loaded when the library is present,
47+
* therefore callers may rely on it without pulling Commons Compress into every
48+
* downstream build.
49+
* </p>
50+
*
51+
* <h4>Run-time guards</h4>
52+
* Some encodings (e.g.&nbsp;{@code br}, {@code zstd}, {@code xz/lzma})
53+
* depend on native helper JARs. {@link #runtimeAvailable(String)} performs a
54+
* lightweight {@code Class.forName} probe so we register such codecs only when
55+
* the helper is on the class-path.
56+
*
57+
* @since 5.6
58+
*/
59+
@Internal
60+
@Contract(threading = ThreadingBehavior.STATELESS)
61+
final class CommonsCompressDecoderFactory implements InputStreamFactory {
62+
63+
private final String encoding;
64+
65+
CommonsCompressDecoderFactory(final String encoding) {
66+
this.encoding = encoding.toLowerCase(Locale.ROOT);
67+
}
68+
69+
@Override
70+
public String getContentEncoding() {
71+
return encoding;
72+
}
73+
74+
@Override
75+
public InputStream create(final InputStream source) throws IOException {
76+
try {
77+
return new CompressorStreamFactory()
78+
.createCompressorInputStream(encoding, source);
79+
} catch (final CompressorException | LinkageError ex) {
80+
throw new IOException(
81+
"Unable to decode Content-Encoding '" + encoding + '\'', ex);
82+
}
83+
}
84+
85+
private enum Probe {
86+
BR(ContentCoding.BROTLI.token(), "org.brotli.dec.BrotliInputStream"),
87+
ZSTD(ContentCoding.ZSTD.token(), "com.github.luben.zstd.ZstdInputStream"),
88+
XZ(ContentCoding.XZ.token(), "org.tukaani.xz.XZInputStream"),
89+
LZMA(ContentCoding.LZMA.token(), "org.tukaani.xz.XZInputStream");
90+
91+
final String enc, probeClass;
92+
93+
Probe(final String enc, final String probeClass) {
94+
this.enc = enc;
95+
this.probeClass = probeClass;
96+
}
97+
98+
static String helperFor(final String enc) {
99+
for (final Probe p : values()) {
100+
if (p.enc.equals(enc)) {
101+
return p.probeClass;
102+
}
103+
}
104+
return null;
105+
}
106+
}
107+
108+
static boolean runtimeAvailable(final String enc) {
109+
final String probe = Probe.helperFor(enc.toLowerCase(Locale.ROOT));
110+
if (probe == null) {
111+
return true;
112+
}
113+
try {
114+
Class.forName(
115+
probe, false,
116+
CommonsCompressDecoderFactory.class.getClassLoader());
117+
return true;
118+
} catch (final ClassNotFoundException | LinkageError ex) {
119+
return false;
120+
}
121+
}
122+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
* ====================================================================
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
* ====================================================================
20+
*
21+
* This software consists of voluntary contributions made by many
22+
* individuals on behalf of the Apache Software Foundation. For more
23+
* information on the Apache Software Foundation, please see
24+
* <http://www.apache.org/>.
25+
*
26+
*/
27+
28+
package org.apache.hc.client5.http.entity.compress;
29+
30+
/**
31+
* Enumeration of the canonical IANA content-coding tokens supported by HttpClient for
32+
* HTTP request and response bodies.
33+
* <p>
34+
* Each constant corresponds to the standard token used in the {@code Content-Encoding}
35+
* and {@code Accept-Encoding} headers. Some codings (e.g. Brotli, Zstandard, XZ/LZMA)
36+
* may require additional helper libraries at runtime.
37+
*
38+
* @since 5.6
39+
*/
40+
public enum ContentCoding {
41+
42+
/**
43+
* GZIP compression format.
44+
*/
45+
GZIP("gzip"),
46+
/**
47+
* "deflate" compression format (zlib or raw).
48+
*/
49+
DEFLATE("deflate"),
50+
/**
51+
* Legacy alias for GZIP.
52+
*/
53+
X_GZIP("x-gzip"),
54+
55+
// Optional codecs requiring Commons-Compress or native helpers
56+
/**
57+
* Brotli compression format.
58+
*/
59+
BROTLI("br"),
60+
/**
61+
* Zstandard compression format.
62+
*/
63+
ZSTD("zstd"),
64+
/**
65+
* XZ compression format.
66+
*/
67+
XZ("xz"),
68+
/**
69+
* LZMA compression format.
70+
*/
71+
LZMA("lzma"),
72+
/**
73+
* Framed LZ4 compression format.
74+
*/
75+
LZ4_FRAMED("lz4-framed"),
76+
/**
77+
* Block LZ4 compression format.
78+
*/
79+
LZ4_BLOCK("lz4-block"),
80+
/**
81+
* BZIP2 compression format.
82+
*/
83+
BZIP2("bzip2"),
84+
/**
85+
* Pack200 compression format.
86+
*/
87+
PACK200("pack200"),
88+
/**
89+
* Deflate64 compression format.
90+
*/
91+
DEFLATE64("deflate64");
92+
93+
private final String token;
94+
95+
ContentCoding(final String token) {
96+
this.token = token;
97+
}
98+
99+
/**
100+
* Returns the standard IANA token string for this content-coding.
101+
*
102+
* @return the lowercase token used in HTTP headers
103+
*/
104+
public String token() {
105+
return token;
106+
}
107+
}

0 commit comments

Comments
 (0)