diff --git a/dotCMS/src/main/java/com/dotcms/rest/api/v1/temp/TempFileAPI.java b/dotCMS/src/main/java/com/dotcms/rest/api/v1/temp/TempFileAPI.java index 15ff0f3cdad0..fb7366070670 100644 --- a/dotCMS/src/main/java/com/dotcms/rest/api/v1/temp/TempFileAPI.java +++ b/dotCMS/src/main/java/com/dotcms/rest/api/v1/temp/TempFileAPI.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.liferay.portal.model.User; import com.liferay.portal.util.PortalUtil; @@ -42,7 +43,9 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -72,6 +75,48 @@ public class TempFileAPI { private static final String WHO_CAN_USE_TEMP_FILE = "whoCanUse.tmp"; private static final String TEMP_RESOURCE_BY_URL_ADMIN_ONLY="TEMP_RESOURCE_BY_URL_ADMIN_ONLY"; private static final Lazy allowAccessToPrivateSubnets = Lazy.of(()->Config.getBooleanProperty("ALLOW_ACCESS_TO_PRIVATE_SUBNETS", false)); + + /** + * Builds a map of browser-compatible HTTP request headers for remote URL downloads. + * Many servers (CDNs, Cloudflare-protected sites, etc.) reject requests that lack + * standard browser headers, returning 403 or 406 responses. + * + *

All headers are read from {@link Config} on every call so that runtime changes + * (system table updates / hot-reload) take effect without a restart. + * Setting a config key to a blank value disables that header entirely, giving operators + * full control over which headers are sent.

+ * + *

Config keys and their defaults: + *

+ *

+ */ + static Map getBrowserHeaders() { + final Map headers = new LinkedHashMap<>(); + final String[][] defs = { + {"User-Agent", Config.getStringProperty("TEMP_FILE_URL_USER_AGENT", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")}, + {"Accept", Config.getStringProperty("TEMP_FILE_URL_ACCEPT", + "*/*")}, + {"Accept-Language", Config.getStringProperty("TEMP_FILE_URL_ACCEPT_LANGUAGE", + "en-US,en;q=0.9")}, + {"Accept-Encoding", Config.getStringProperty("TEMP_FILE_URL_ACCEPT_ENCODING", + "gzip, deflate")}, + {"Connection", Config.getStringProperty("TEMP_FILE_URL_CONNECTION", + "keep-alive")}, + }; + for (final String[] entry : defs) { + if (UtilMethods.isSet(entry[1])) { + headers.put(entry[0], entry[1]); + } + } + return ImmutableMap.copyOf(headers); + } /** @@ -242,6 +287,7 @@ public DotTempFile createTempFileFromUrl(final String incomingFileName, Files.newOutputStream(tempFile.toPath()))){ final CircuitBreakerUrl urlGetter = CircuitBreakerUrl.builder().setMethod(Method.GET).setUrl(finalUrl) + .setHeaders(getBrowserHeaders()) .setTimeout(timeoutSeconds * 1000L).build(); urlGetter.doOut(out); } @@ -270,7 +316,8 @@ public boolean validUrl(final String url) { String done; try { final CircuitBreakerUrl urlGetter = - CircuitBreakerUrl.builder().setMethod(Method.GET).setUrl(url).build(); + CircuitBreakerUrl.builder().setMethod(Method.GET).setUrl(url) + .setHeaders(getBrowserHeaders()).build(); done = urlGetter.doString(); } catch (IOException | BadRequestException e) {//If response is not 200, CircuitBreakerUrl throws BadRequestException return false; diff --git a/dotcms-integration/src/test/java/com/dotcms/rest/api/v1/temp/TempFileAPITest.java b/dotcms-integration/src/test/java/com/dotcms/rest/api/v1/temp/TempFileAPITest.java index a68cb204898d..8e2828f33190 100644 --- a/dotcms-integration/src/test/java/com/dotcms/rest/api/v1/temp/TempFileAPITest.java +++ b/dotcms-integration/src/test/java/com/dotcms/rest/api/v1/temp/TempFileAPITest.java @@ -3,14 +3,18 @@ import static com.dotcms.datagen.TestDataUtils.getFileAssetContent; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import com.dotcms.datagen.TestDataUtils; import com.dotcms.datagen.TestDataUtils.TestFile; +import com.dotcms.http.server.mock.MockHttpServer; +import com.dotcms.http.server.mock.MockHttpServerContext; import com.dotcms.mock.request.MockSession; import com.dotcms.util.IntegrationTestInitService; +import com.dotcms.util.network.IPUtils; import com.dotmarketing.business.APILocator; import com.dotmarketing.exception.DotSecurityException; import com.dotmarketing.util.FileUtil; @@ -18,7 +22,12 @@ import com.liferay.portal.util.WebKeys; import java.io.File; import java.io.IOException; +import java.net.HttpURLConnection; +import java.util.Arrays; +import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.concurrent.atomic.AtomicReference; import javax.servlet.http.HttpServletRequest; import org.jetbrains.annotations.NotNull; import org.junit.BeforeClass; @@ -108,5 +117,127 @@ public static HttpServletRequest mockHttpServletRequest() { return request; } + /** + * Method to test: {@link TempFileAPI#getBrowserHeaders()} + * Test scenario: Verify that all required browser-compatible header keys are present + * Expected: The map contains the 5 required header keys (no Sec-Fetch-* headers) + */ + @Test + public void testBrowserHeaders_containsAllRequiredKeys() { + final List requiredHeaders = Arrays.asList( + "User-Agent", + "Accept", + "Accept-Language", + "Accept-Encoding", + "Connection" + ); + + final Map headers = TempFileAPI.getBrowserHeaders(); + assertNotNull("getBrowserHeaders() must not return null", headers); + for (final String header : requiredHeaders) { + assertTrue( + "getBrowserHeaders() must contain header: " + header, + headers.containsKey(header)); + assertNotNull( + "Value for header '" + header + "' must not be null", + headers.get(header)); + assertFalse( + "Value for header '" + header + "' must not be empty", + headers.get(header).isEmpty()); + } + } + + /** + * Method to test: {@link TempFileAPI#getBrowserHeaders()} + * Test scenario: Verify the default values of the browser-compatible headers + * Expected: + * - Accept defaults to {@code *}{@code /*} (generic, not image-specific) + * - Connection defaults to {@code keep-alive} and is configurable + * - Accept-Encoding does not advertise Brotli (br) — Apache HttpClient has no brotli decoder + * - No Sec-Fetch-* headers are present (they are browser-generated metadata, not for server use) + */ + @Test + public void testBrowserHeaders_defaultValues() { + final Map headers = TempFileAPI.getBrowserHeaders(); + + // Connection defaults to keep-alive (configurable via TEMP_FILE_URL_CONNECTION) + assertEquals("keep-alive", headers.get("Connection")); + + // Accept must default to */* — the endpoint downloads any file type, not just images + assertEquals("*/*", headers.get("Accept")); + + // Configurable headers must be present and non-empty (may be overridden via Config) + assertTrue("User-Agent must not be empty", + !headers.get("User-Agent").isEmpty()); + assertTrue("Accept-Language must not be empty", + !headers.get("Accept-Language").isEmpty()); + + // Accept-Encoding must not advertise brotli; Apache HttpClient has no brotli decoder + final String acceptEncoding = headers.get("Accept-Encoding"); + assertTrue("Accept-Encoding must not be empty", !acceptEncoding.isEmpty()); + assertFalse("Accept-Encoding must not advertise brotli (br)", acceptEncoding.contains("br")); + + // Sec-Fetch-* headers must NOT be present — they are browser-generated Fetch Metadata + // headers whose purpose is to let servers verify a request came from a real browser context. + // Sending them from a server-side HTTP client is misleading and may cause rejections. + assertFalse("Sec-Fetch-Dest must not be present", headers.containsKey("Sec-Fetch-Dest")); + assertFalse("Sec-Fetch-Mode must not be present", headers.containsKey("Sec-Fetch-Mode")); + assertFalse("Sec-Fetch-Site must not be present", headers.containsKey("Sec-Fetch-Site")); + } + + /** + * Method to test: {@link TempFileAPI#validUrl(String)} + * Test scenario: Verify that browser-like headers are actually sent in the outbound HTTP request + * Expected: All headers returned by getBrowserHeaders() arrive at the mock server + */ + @Test + public void testValidUrl_sendsBrowserHeaders() { + final String mockIp = "127.0.0.1"; + final int mockPort = 50881; + final String path = "/image.png"; + + // Capture the incoming request headers via an AtomicReference + final AtomicReference capturedHeaders = + new AtomicReference<>(); + + final MockHttpServerContext context = new MockHttpServerContext.Builder() + .uri(path) + .responseStatus(HttpURLConnection.HTTP_OK) + .responseBody("OK") + .requestCondition( + "Capture request headers", + requestCtx -> { + capturedHeaders.set(requestCtx.getHeaders()); + return true; + }) + .mustBeCalled() + .build(); + + final MockHttpServer mockHttpServer = new MockHttpServer(mockIp, mockPort); + mockHttpServer.addContext(context); + mockHttpServer.start(); + IPUtils.disabledIpPrivateSubnet(true); + + try { + final boolean valid = APILocator.getTempFileAPI() + .validUrl("http://" + mockIp + ":" + mockPort + path); + + assertTrue("validUrl should return true for a 200 response", valid); + mockHttpServer.validate(); + + final com.sun.net.httpserver.Headers received = capturedHeaders.get(); + assertNotNull("Request headers must have been captured", received); + + // Every header in getBrowserHeaders() must have been sent to the server + for (final String expectedHeader : TempFileAPI.getBrowserHeaders().keySet()) { + assertTrue( + "Outbound request must include header: " + expectedHeader, + received.containsKey(expectedHeader)); + } + } finally { + mockHttpServer.stop(); + IPUtils.disabledIpPrivateSubnet(false); + } + } }