From 809d01fabda91e7f92efda2cf1e2401dbf122b7e Mon Sep 17 00:00:00 2001 From: Edward Li Date: Wed, 12 Mar 2025 17:47:36 -0700 Subject: [PATCH 1/5] Add FileIO-level `allowed_paths` check --- src/codegen/sdk/codebase/codebase_context.py | 3 ++- src/codegen/sdk/codebase/io/file_io.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index 6e361ef1e..b58f89ede 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -159,7 +159,6 @@ def __init__( # =====[ __init__ attributes ]===== self.projects = projects - self.io = io or FileIO() context = projects[0] self.node_classes = get_node_classes(context.programming_language) self.config = config or CodebaseConfig() @@ -169,6 +168,8 @@ def __init__( self.full_path = os.path.join(self.repo_path, context.base_path) if context.base_path else self.repo_path self.codeowners_parser = context.repo_operator.codeowners_parser self.base_url = context.repo_operator.base_url + # TODO: Fix this to be more robust with multiple projects + self.io = io or FileIO(allowed_paths=[Path(self.repo_path).resolve()]) # =====[ computed attributes ]===== self.transaction_manager = TransactionManager() self._autocommit = AutoCommit(self) diff --git a/src/codegen/sdk/codebase/io/file_io.py b/src/codegen/sdk/codebase/io/file_io.py index a0ced7e41..f59a28851 100644 --- a/src/codegen/sdk/codebase/io/file_io.py +++ b/src/codegen/sdk/codebase/io/file_io.py @@ -11,14 +11,24 @@ class FileIO(IO): """IO implementation that writes files to disk, and tracks pending changes.""" files: dict[Path, bytes] + allowed_paths: list[Path] | None - def __init__(self): + def __init__(self, allowed_paths: list[Path] | None = None): self.files = {} + self.allowed_paths = allowed_paths + + def _verify_path(self, path: Path) -> None: + if self.allowed_paths is not None: + if not any(path.resolve().is_relative_to(p.resolve()) for p in self.allowed_paths): + msg = f"Path {path.resolve()} is not within allowed paths {self.allowed_paths}" + raise BadWriteError(msg) def write_bytes(self, path: Path, content: bytes) -> None: + self._verify_path(path) self.files[path] = content def read_bytes(self, path: Path) -> bytes: + self._verify_path(path) if path in self.files: return self.files[path] else: @@ -26,6 +36,8 @@ def read_bytes(self, path: Path) -> bytes: def save_files(self, files: set[Path] | None = None) -> None: to_save = set(filter(lambda f: f in files, self.files)) if files is not None else self.files.keys() + for path in to_save: + self._verify_path(path) with ThreadPoolExecutor() as exec: exec.map(lambda path: path.write_bytes(self.files[path]), to_save) if files is None: @@ -40,12 +52,15 @@ def check_changes(self) -> None: self.files.clear() def delete_file(self, path: Path) -> None: + self._verify_path(path) self.untrack_file(path) if path.exists(): path.unlink() def untrack_file(self, path: Path) -> None: + self._verify_path(path) self.files.pop(path, None) def file_exists(self, path: Path) -> bool: + self._verify_path(path) return path.exists() From f87976b89015b2276957260bd67d0051ece1fe98 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Wed, 12 Mar 2025 17:55:56 -0700 Subject: [PATCH 2/5] First check for allow_external --- src/codegen/sdk/codebase/codebase_context.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index b58f89ede..73ea7e55b 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -168,8 +168,11 @@ def __init__( self.full_path = os.path.join(self.repo_path, context.base_path) if context.base_path else self.repo_path self.codeowners_parser = context.repo_operator.codeowners_parser self.base_url = context.repo_operator.base_url - # TODO: Fix this to be more robust with multiple projects - self.io = io or FileIO(allowed_paths=[Path(self.repo_path).resolve()]) + if not self.config.allow_external: + # TODO: Fix this to be more robust with multiple projects + self.io = io or FileIO(allowed_paths=[Path(self.repo_path).resolve()]) + else: + self.io = io or FileIO() # =====[ computed attributes ]===== self.transaction_manager = TransactionManager() self._autocommit = AutoCommit(self) From 22c3f7f999b8f44b427af0499d2f9f1e8b9d8ca0 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Wed, 12 Mar 2025 17:56:18 -0700 Subject: [PATCH 3/5] Move config assertions into codebase_context --- src/codegen/sdk/codebase/codebase_context.py | 7 +++++++ src/codegen/sdk/core/codebase.py | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index 73ea7e55b..53ba72040 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -192,6 +192,13 @@ def __init__( logger.warning("WARNING: The codebase is using an unsupported language!") logger.warning("Some features may not work as expected. Advanced static analysis will be disabled but simple file IO will still work.") + # Assert config assertions + # External import resolution must be enabled if syspath is enabled + if self.config.py_resolve_syspath: + if not self.config.allow_external: + msg = "allow_external must be set to True when py_resolve_syspath is enabled" + raise ValueError(msg) + # Build the graph if not self.config.exp_lazy_graph: self.build_graph(context.repo_operator) diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index 9d6eaf2fd..c8a729741 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -213,13 +213,6 @@ def __init__( self.ctx = CodebaseContext(projects, config=config, secrets=secrets, io=io, progress=progress) self.console = Console(record=True, soft_wrap=True) - # Assert config assertions - # External import resolution must be enabled if syspath is enabled - if self.ctx.config.py_resolve_syspath: - if not self.ctx.config.allow_external: - msg = "allow_external must be set to True when py_resolve_syspath is enabled" - raise ValueError(msg) - @noapidoc def __str__(self) -> str: return f"" From d6656bd0ec3030a32c922bee6191fc996e48b13f Mon Sep 17 00:00:00 2001 From: Edward Li Date: Thu, 13 Mar 2025 11:35:08 -0700 Subject: [PATCH 4/5] Add exception tests for fileio --- tests/unit/codegen/sdk/io/test_file_io.py | 102 +++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/tests/unit/codegen/sdk/io/test_file_io.py b/tests/unit/codegen/sdk/io/test_file_io.py index 6f8148e4f..c976836e8 100644 --- a/tests/unit/codegen/sdk/io/test_file_io.py +++ b/tests/unit/codegen/sdk/io/test_file_io.py @@ -1,6 +1,8 @@ +from pathlib import Path + import pytest -from codegen.sdk.codebase.io.file_io import FileIO +from codegen.sdk.codebase.io.file_io import BadWriteError, FileIO @pytest.fixture @@ -61,3 +63,101 @@ def test_delete_file(file_io, tmp_path): assert not test_file.exists() assert test_file not in file_io.files + + +def test_read_and_write_bounded(file_io, tmp_path): + allowed_dir = tmp_path / "allowed" + file_io.allowed_paths = [allowed_dir] + + allowed_file = allowed_dir / "test.txt" + content = b"test content" + + file_io.write_bytes(allowed_file, content) + assert file_io.read_bytes(allowed_file) == content + + with pytest.raises(BadWriteError) as exc_info: + bad_file = tmp_path / "test.txt" + file_io.write_bytes(bad_file, content) + + assert "is not within allowed paths" in str(exc_info.value) + + with pytest.raises(BadWriteError) as exc_info: + bad_file_2 = allowed_dir / ".." / "test2.txt" + file_io.write_bytes(bad_file_2, content) + + assert "is not within allowed paths" in str(exc_info.value) + + +def test_read_bounded(file_io, tmp_path): + allowed_dir = tmp_path / "allowed" + allowed_dir.mkdir(exist_ok=True) + file_io.allowed_paths = [allowed_dir] + + allowed_file = allowed_dir / "test.txt" + content = b"test content" + allowed_file.write_bytes(content) + + assert file_io.read_bytes(allowed_file) == content + + with pytest.raises(BadWriteError) as exc_info: + bad_file = tmp_path / "test.txt" + bad_file.write_bytes(content) + file_io.read_bytes(bad_file) + + assert "is not within allowed paths" in str(exc_info.value) + + with pytest.raises(BadWriteError) as exc_info: + bad_file_2 = allowed_dir / ".." / "test2.txt" + bad_file_2.write_bytes(content) + file_io.read_bytes(bad_file_2) + + assert "is not within allowed paths" in str(exc_info.value) + + +def test_delete_file_bounded(file_io, tmp_path): + allowed_dir = tmp_path / "allowed" + allowed_dir.mkdir(exist_ok=True) + file_io.allowed_paths = [allowed_dir] + + allowed_file = allowed_dir / "test.txt" + allowed_file.write_bytes(b"test content") + + file_io.delete_file(allowed_file) + + with pytest.raises(BadWriteError) as exc_info: + bad_file = tmp_path / "test.txt" + bad_file.write_bytes(b"test content") + file_io.delete_file(bad_file) + + assert "is not within allowed paths" in str(exc_info.value) + + with pytest.raises(BadWriteError) as exc_info: + bad_file_2 = allowed_dir / ".." / "test2.txt" + bad_file_2.write_bytes(b"test content") + file_io.delete_file(bad_file_2) + + assert "is not within allowed paths" in str(exc_info.value) + +def test_file_exists_bounded(file_io, tmp_path): + allowed_dir = tmp_path / "allowed" + allowed_dir.mkdir(exist_ok=True) + file_io.allowed_paths = [allowed_dir] + + allowed_file = allowed_dir / "test.txt" + allowed_file.write_bytes(b"test content") + + assert file_io.file_exists(allowed_file) + + with pytest.raises(BadWriteError) as exc_info: + bad_file = tmp_path / "test.txt" + bad_file.write_bytes(b"test content") + file_io.file_exists(bad_file) + + assert "is not within allowed paths" in str(exc_info.value) + + with pytest.raises(BadWriteError) as exc_info: + bad_file_2 = allowed_dir / ".." / "test2.txt" + bad_file_2.write_bytes(b"test content") + file_io.file_exists(bad_file_2) + + assert "is not within allowed paths" in str(exc_info.value) From 380cf9a17ffd0054c830faf5d54382a82ecf7424 Mon Sep 17 00:00:00 2001 From: EdwardJXLi <20020059+EdwardJXLi@users.noreply.github.com> Date: Thu, 13 Mar 2025 18:37:10 +0000 Subject: [PATCH 5/5] Automated pre-commit update --- tests/unit/codegen/sdk/io/test_file_io.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/codegen/sdk/io/test_file_io.py b/tests/unit/codegen/sdk/io/test_file_io.py index c976836e8..bb627da5d 100644 --- a/tests/unit/codegen/sdk/io/test_file_io.py +++ b/tests/unit/codegen/sdk/io/test_file_io.py @@ -1,5 +1,3 @@ -from pathlib import Path - import pytest from codegen.sdk.codebase.io.file_io import BadWriteError, FileIO @@ -138,6 +136,7 @@ def test_delete_file_bounded(file_io, tmp_path): assert "is not within allowed paths" in str(exc_info.value) + def test_file_exists_bounded(file_io, tmp_path): allowed_dir = tmp_path / "allowed" allowed_dir.mkdir(exist_ok=True)