Skip to content

Commit 67c6a01

Browse files
authored
fix: harden user file media serving (baserow#5270)
1 parent 6b4d7f4 commit 67c6a01

22 files changed

Lines changed: 309 additions & 74 deletions

File tree

Caddyfile

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,29 @@
66
{$BASEROW_CADDY_GLOBAL_CONF}
77
}
88

9+
(baserow_media_files) {
10+
handle_path /media/* {
11+
@downloads {
12+
query dl=*
13+
}
14+
header @downloads Content-Disposition "attachment; filename={query.dl}"
15+
header X-Content-Type-Options "nosniff"
16+
header Content-Security-Policy "sandbox; default-src 'none'; script-src 'none'; object-src 'none'; base-uri 'none'"
17+
18+
# Allow to call head from public url to get size
19+
header {
20+
Access-Control-Allow-Origin {$BASEROW_PUBLIC_URL:http://localhost}
21+
Access-Control-Allow-Methods "GET, HEAD, OPTIONS"
22+
Access-Control-Allow-Headers "*"
23+
Access-Control-Expose-Headers "Content-Length, Content-Type"
24+
}
25+
26+
file_server {
27+
root {$MEDIA_ROOT:/baserow/media/}
28+
}
29+
}
30+
}
31+
932
{$BASEROW_CADDY_ADDRESSES} {
1033
tls {
1134
on_demand
@@ -20,30 +43,27 @@
2043
`
2144
}
2245

46+
@is_baserow_media {
47+
path /media/*
48+
expression `
49+
"{$MEDIA_URL:}".startsWith("http://" + {http.request.host} + "/") ||
50+
"{$MEDIA_URL:}".startsWith("https://" + {http.request.host} + "/") ||
51+
"{$MEDIA_URL:}".startsWith("http://" + {http.request.host} + ":") ||
52+
"{$MEDIA_URL:}".startsWith("https://" + {http.request.host} + ":")
53+
`
54+
}
55+
56+
handle @is_baserow_media {
57+
import baserow_media_files
58+
}
59+
2360
handle @is_baserow_tool {
2461
@backend_routes path /api/* /ws/* /mcp/* /assistant/* {$BASEROW_CADDY_BACKEND_EXTRA_ROUTES:}
2562
handle @backend_routes {
2663
reverse_proxy {$PRIVATE_BACKEND_URL:localhost:8000}
2764
}
2865

29-
handle_path /media/* {
30-
@downloads {
31-
query dl=*
32-
}
33-
header @downloads Content-disposition "attachment; filename={query.dl}"
34-
35-
# Allow to call head from public url to get size
36-
header {
37-
Access-Control-Allow-Origin {$BASEROW_PUBLIC_URL:http://localhost}
38-
Access-Control-Allow-Methods "GET, HEAD, OPTIONS"
39-
Access-Control-Allow-Headers "*"
40-
Access-Control-Expose-Headers "Content-Length, Content-Type"
41-
}
42-
43-
file_server {
44-
root {$MEDIA_ROOT:/baserow/media/}
45-
}
46-
}
66+
import baserow_media_files
4767

4868
handle_path /static/* {
4969
file_server {

Caddyfile.dev

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
@downloads {
1010
query dl=*
1111
}
12-
header @downloads Content-disposition "attachment; filename={query.dl}"
12+
header @downloads Content-Disposition "attachment; filename={query.dl}"
13+
header X-Content-Type-Options "nosniff"
14+
header Content-Security-Policy "sandbox; default-src 'none'; script-src 'none'; object-src 'none'; base-uri 'none'"
1315

1416
header {
1517
Access-Control-Allow-Origin {$PUBLIC_WEB_FRONTEND_URL:http://localhost:3000}

backend/src/baserow/api/user_files/views.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from baserow.api.schemas import get_error_schema
1111
from baserow.contrib.database.api.tokens.authentications import TokenAuthentication
1212
from baserow.core.user_files.exceptions import (
13+
ActiveContentBlockedUserFileError,
1314
FileSizeTooLargeError,
1415
FileURLCouldNotBeReached,
1516
InvalidFileStreamError,
@@ -49,6 +50,7 @@ class UploadFileView(APIView):
4950
{
5051
InvalidFileStreamError: ERROR_INVALID_FILE,
5152
FileSizeTooLargeError: ERROR_FILE_SIZE_TOO_LARGE,
53+
ActiveContentBlockedUserFileError: ERROR_INVALID_FILE,
5254
}
5355
)
5456
def post(self, request):
@@ -93,6 +95,7 @@ class UploadViaURLView(APIView):
9395
FileSizeTooLargeError: ERROR_FILE_SIZE_TOO_LARGE,
9496
FileURLCouldNotBeReached: ERROR_FILE_URL_COULD_NOT_BE_REACHED,
9597
InvalidFileURLError: ERROR_INVALID_FILE_URL,
98+
ActiveContentBlockedUserFileError: ERROR_INVALID_FILE,
9699
}
97100
)
98101
@validate_body(UserFileUploadViaURLRequestSerializer)

backend/src/baserow/config/settings/base.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,15 @@
611611
Decimal(os.getenv("BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB", 1024 * 1024)) * 1024 * 1024
612612
) # ~1TB by default
613613

614+
FILE_UPLOAD_ACTIVE_CONTENT_POLICY = os.getenv(
615+
"BASEROW_FILE_UPLOAD_ACTIVE_CONTENT_POLICY", "download"
616+
).lower()
617+
if FILE_UPLOAD_ACTIVE_CONTENT_POLICY not in ("download", "block"):
618+
raise ImproperlyConfigured(
619+
"BASEROW_FILE_UPLOAD_ACTIVE_CONTENT_POLICY must be set to "
620+
"'download' or 'block'."
621+
)
622+
614623
BASEROW_OPENAI_UPLOADED_FILE_SIZE_LIMIT_MB = int(
615624
os.getenv("BASEROW_OPENAI_UPLOADED_FILE_SIZE_LIMIT_MB", 512)
616625
)
@@ -778,15 +787,22 @@ def __setitem__(self, key, value):
778787
if not BASEROW_EMBEDDED_SHARE_URL:
779788
BASEROW_EMBEDDED_SHARE_URL = PUBLIC_WEB_FRONTEND_URL
780789

790+
MEDIA_URL_PATH = "/media/"
791+
MEDIA_URL = os.getenv("MEDIA_URL", urljoin(PUBLIC_BACKEND_URL, MEDIA_URL_PATH))
792+
781793
PRIVATE_BACKEND_URL = os.getenv("PRIVATE_BACKEND_URL", "http://backend:8000")
782794
PUBLIC_BACKEND_HOSTNAME = urlparse(PUBLIC_BACKEND_URL).hostname
783795
PUBLIC_WEB_FRONTEND_HOSTNAME = urlparse(PUBLIC_WEB_FRONTEND_URL).hostname
784796
BASEROW_EMBEDDED_SHARE_HOSTNAME = urlparse(BASEROW_EMBEDDED_SHARE_URL).hostname
797+
MEDIA_URL_HOSTNAME = urlparse(MEDIA_URL).hostname
785798
PRIVATE_BACKEND_HOSTNAME = urlparse(PRIVATE_BACKEND_URL).hostname
786799

787800
if PUBLIC_BACKEND_HOSTNAME:
788801
ALLOWED_HOSTS.append(PUBLIC_BACKEND_HOSTNAME)
789802

803+
if MEDIA_URL_HOSTNAME:
804+
ALLOWED_HOSTS.append(MEDIA_URL_HOSTNAME)
805+
790806
if PRIVATE_BACKEND_HOSTNAME:
791807
ALLOWED_HOSTS.append(PRIVATE_BACKEND_HOSTNAME)
792808

@@ -951,8 +967,6 @@ def __setitem__(self, key, value):
951967
os.getenv("BASEROW_INITIAL_CREATE_SYNC_TABLE_DATA_LIMIT", 5000)
952968
)
953969

954-
MEDIA_URL_PATH = "/media/"
955-
MEDIA_URL = os.getenv("MEDIA_URL", urljoin(PUBLIC_BACKEND_URL, MEDIA_URL_PATH))
956970
MEDIA_ROOT = os.getenv("MEDIA_ROOT", "/baserow/media")
957971

958972
# Indicates the directory where the user files and user thumbnails are stored.

backend/src/baserow/contrib/builder/api/domains/views.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,8 @@ def get(self, request):
401401
+ settings.EXTRA_PUBLIC_BACKEND_HOSTNAMES
402402
+ settings.EXTRA_PUBLIC_WEB_FRONTEND_HOSTNAMES
403403
)
404+
if settings.MEDIA_URL_HOSTNAME:
405+
allowed_domain.add(settings.MEDIA_URL_HOSTNAME)
404406

405407
if domain_name in allowed_domain:
406408
return Response(None, status=200)

backend/src/baserow/core/user_files/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ def __init__(self, max_size_bytes, *args, **kwargs):
1515
super().__init__(*args, **kwargs)
1616

1717

18+
class ActiveContentBlockedUserFileError(Exception):
19+
"""Raised when the file upload active content policy blocks a file."""
20+
21+
1822
class FileURLCouldNotBeReached(Exception):
1923
"""
2024
Raised when the provided URL could not be reached or points to an internal

backend/src/baserow/core/user_files/handler.py

Lines changed: 101 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from baserow.core.utils import random_string, sha256_hash, stream_size, truncate_middle
3232

3333
from .exceptions import (
34+
ActiveContentBlockedUserFileError,
3435
FileSizeTooLargeError,
3536
FileURLCouldNotBeReached,
3637
InvalidFileStreamError,
@@ -43,9 +44,66 @@
4344
from PIL import Image
4445

4546
MIME_TYPE_UNKNOWN = "application/octet-stream"
47+
ACTIVE_CONTENT_EXTENSIONS = {"html", "htm", "xhtml", "xml", "svg", "svgz"}
48+
ACTIVE_CONTENT_MIME_TYPES = {
49+
"application/xhtml+xml",
50+
"application/xml",
51+
"image/svg+xml",
52+
"text/html",
53+
"text/xml",
54+
}
4655

4756

4857
class UserFileHandler:
58+
def _is_active_content_extension(self, extension: str) -> bool:
59+
return extension.lower() in ACTIVE_CONTENT_EXTENSIONS
60+
61+
def _is_active_content_mime_type(self, mime_type: str) -> bool:
62+
return mime_type.lower() in ACTIVE_CONTENT_MIME_TYPES
63+
64+
def _resolve_mime_type_and_active_content(
65+
self, file_name: str, extension: str, stream
66+
) -> tuple[str, bool]:
67+
"""
68+
Resolves the MIME type for an uploaded file and decides whether it should be
69+
treated as active content.
70+
71+
Both the filename-derived MIME type and the client-supplied `content_type` are
72+
checked independently against the active-content blocklist so that a malicious
73+
client cannot bypass the gate by pairing an innocuous extension with a dangerous
74+
Content-Type header (or vice versa).
75+
76+
:param file_name: The name of the uploaded file.
77+
:param extension: The file extension of the uploaded file.
78+
:param stream: The file content stream of the uploaded file, which may have a
79+
`content_type` attribute.
80+
:return: A tuple of the resolved MIME type and whether the file is considered
81+
active content.
82+
"""
83+
84+
guessed_mime_type = mimetypes.guess_type(file_name)[0]
85+
uploaded_mime_type = getattr(stream, "content_type", None)
86+
mime_type = guessed_mime_type or uploaded_mime_type or MIME_TYPE_UNKNOWN
87+
is_active_content = (
88+
self._is_active_content_extension(extension)
89+
or (
90+
guessed_mime_type is not None
91+
and self._is_active_content_mime_type(guessed_mime_type)
92+
)
93+
or (
94+
uploaded_mime_type is not None
95+
and self._is_active_content_mime_type(uploaded_mime_type)
96+
)
97+
)
98+
return mime_type, is_active_content
99+
100+
def _neutralize_active_content(self, user_file: UserFile) -> UserFile:
101+
user_file.mime_type = MIME_TYPE_UNKNOWN
102+
user_file.is_image = False
103+
user_file.image_width = None
104+
user_file.image_height = None
105+
return user_file
106+
49107
def is_user_file_name(self, user_file_name: str) -> bool:
50108
"""
51109
Checks if the given name is a user file name.
@@ -266,6 +324,15 @@ def upload_user_file(self, user, file_name, stream, storage=None):
266324
storage = storage or get_default_storage()
267325
stream_hash = sha256_hash(stream)
268326
file_name = truncate_middle(file_name, 64)
327+
extension = pathlib.Path(file_name).suffix[1:].lower()
328+
mime_type, is_active_content = self._resolve_mime_type_and_active_content(
329+
file_name, extension, stream
330+
)
331+
332+
if is_active_content and settings.FILE_UPLOAD_ACTIVE_CONTENT_POLICY == "block":
333+
raise ActiveContentBlockedUserFileError(
334+
"The provided file type is not allowed."
335+
)
269336

270337
existing_user_file = UserFile.objects.filter(
271338
original_name=file_name,
@@ -274,14 +341,18 @@ def upload_user_file(self, user, file_name, stream, storage=None):
274341
).first()
275342

276343
if existing_user_file:
344+
if is_active_content:
345+
self._neutralize_active_content(existing_user_file)
346+
existing_user_file.save(
347+
update_fields=[
348+
"mime_type",
349+
"is_image",
350+
"image_width",
351+
"image_height",
352+
]
353+
)
277354
return existing_user_file
278355

279-
extension = pathlib.Path(file_name).suffix[1:].lower()
280-
mime_type = (
281-
mimetypes.guess_type(file_name)[0]
282-
or getattr(stream, "content_type", None)
283-
or MIME_TYPE_UNKNOWN
284-
)
285356
unique = self.generate_unique(stream_hash, extension)
286357
user_file = UserFile(
287358
original_name=file_name,
@@ -293,26 +364,30 @@ def upload_user_file(self, user, file_name, stream, storage=None):
293364
sha256_hash=stream_hash,
294365
)
295366

296-
image = None
297-
try:
298-
image = Image.open(stream)
299-
user_file.mime_type = f"image/{image.format}".lower()
300-
self.generate_and_save_image_thumbnails(
301-
image, user_file.name, storage=storage
302-
)
303-
# Skip marking as images if thumbnails cannot be generated (i.e. PSD files).
304-
user_file.is_image = True
305-
user_file.image_width = image.width
306-
user_file.image_height = image.height
307-
except IOError:
308-
pass # Not an image
309-
except Exception as exc:
310-
logger.warning(
311-
f"Failed to generate thumbnails for user file of type {mime_type}: {exc}"
312-
)
313-
finally:
314-
if image is not None:
315-
del image
367+
if is_active_content:
368+
self._neutralize_active_content(user_file)
369+
else:
370+
image = None
371+
try:
372+
image = Image.open(stream)
373+
user_file.mime_type = f"image/{image.format}".lower()
374+
self.generate_and_save_image_thumbnails(
375+
image, user_file.name, storage=storage
376+
)
377+
# Skip marking as images if thumbnails cannot be generated (i.e. PSD files).
378+
user_file.is_image = True
379+
user_file.image_width = image.width
380+
user_file.image_height = image.height
381+
except IOError:
382+
pass # Not an image
383+
except Exception as exc:
384+
logger.warning(
385+
f"Failed to generate thumbnails for user file of type "
386+
f"{mime_type}: {exc}"
387+
)
388+
finally:
389+
if image is not None:
390+
del image
316391

317392
user_file.save()
318393

backend/tests/baserow/api/user_files/test_user_files_views.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from django.core.files.storage import FileSystemStorage
55
from django.core.files.uploadedfile import SimpleUploadedFile
66
from django.shortcuts import reverse
7+
from django.test import override_settings
78

89
import pytest
910
import responses
@@ -19,6 +20,22 @@
1920
from baserow.core.models import UserFile
2021

2122

23+
@pytest.mark.django_db
24+
@override_settings(FILE_UPLOAD_ACTIVE_CONTENT_POLICY="block")
25+
def test_upload_file_active_content_blocked(api_client, data_fixture):
26+
user, token = data_fixture.create_user_and_token()
27+
28+
response = api_client.post(
29+
reverse("api:user_files:upload_file"),
30+
data={"file": SimpleUploadedFile("index.html", b"<script></script>")},
31+
format="multipart",
32+
HTTP_AUTHORIZATION=f"JWT {token}",
33+
)
34+
35+
assert response.status_code == HTTP_400_BAD_REQUEST
36+
assert response.json()["error"] == "ERROR_INVALID_FILE"
37+
38+
2239
@pytest.mark.django_db
2340
def test_upload_file_with_jwt_auth(api_client, data_fixture, tmpdir):
2441
user, token = data_fixture.create_user_and_token(

backend/tests/baserow/contrib/builder/api/domains/test_domain_public_views.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,7 @@ def test_ask_public_builder_domain_exists(api_client, data_fixture):
646646
@override_settings(
647647
PUBLIC_BACKEND_HOSTNAME="backend.localhost",
648648
PUBLIC_WEB_FRONTEND_HOSTNAME="web-frontend.localhost",
649+
MEDIA_URL_HOSTNAME="media.localhost",
649650
)
650651
def test_ask_public_builder_domain_exists_with_public_backend_and_web_frontend_domains(
651652
api_client, data_fixture
@@ -662,6 +663,10 @@ def test_ask_public_builder_domain_exists_with_public_backend_and_web_frontend_d
662663
response = api_client.get(url)
663664
assert response.status_code == 200
664665

666+
url = reverse("api:builder:domains:ask_exists") + "?domain=media.localhost"
667+
response = api_client.get(url)
668+
assert response.status_code == 200
669+
665670

666671
@pytest.mark.django_db
667672
@override_settings(

0 commit comments

Comments
 (0)