Skip to content

Commit 8a137b0

Browse files
authored
Merge pull request #18 from OPPIDA/perf/lazy-loading
2 parents 9639556 + 210daf2 commit 8a137b0

21 files changed

Lines changed: 289 additions & 124 deletions

File tree

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ check: ## Lint, format, and type-check the code
1212
@ruff format
1313
@ty check
1414

15+
profile: ## Run profiling
16+
@python3 -X importtime -m codesectools 1>/dev/null 2>/tmp/import.log || tuna /tmp/import.log
17+
1518
test: ## Run tests in a Docker container
1619
@docker compose build 1>/dev/null
1720
@docker compose run --rm no-sast

codesectools/cli.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,13 @@
44
It dynamically discovers and adds CLI commands from all available SAST tools.
55
"""
66

7-
import importlib.metadata
87
import os
98
from typing import Optional
109

1110
import typer
1211
import typer.core
1312
from click import Choice
1413
from rich import print
15-
from rich.table import Table
1614
from typing_extensions import Annotated
1715

1816
from codesectools.datasets import DATASETS_ALL
@@ -26,6 +24,8 @@
2624

2725
def version_callback(value: bool) -> None:
2826
"""Print the application version and exit."""
27+
import importlib.metadata
28+
2929
if value:
3030
print(importlib.metadata.version("codesectools"))
3131
raise typer.Exit()
@@ -65,6 +65,8 @@ def status(
6565
] = False,
6666
) -> None:
6767
"""Display the availability of SAST tools and datasets."""
68+
from rich.table import Table
69+
6870
if sasts or (not sasts and not datasets):
6971
table = Table(show_lines=True)
7072
table.add_column("SAST", justify="center", no_wrap=True)

codesectools/datasets/BenchmarkJava/dataset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
from pathlib import Path
1111
from typing import Self
1212

13-
import git
14-
1513
from codesectools.datasets.core.dataset import File, PrebuiltFileDataset
1614
from codesectools.shared.cwe import CWE, CWEs
1715

@@ -108,7 +106,9 @@ def download_files(self: Self, test: bool = False) -> None:
108106
test: If True, reduce the number of test files for faster testing.
109107
110108
"""
111-
git.Repo.clone_from(
109+
from git import Repo
110+
111+
Repo.clone_from(
112112
"https://github.com/OWASP-Benchmark/BenchmarkJava.git", self.directory
113113
)
114114

codesectools/datasets/JulietTestSuiteC/dataset.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
from pathlib import Path
1414
from typing import Self
1515

16-
import requests
17-
from lxml import etree
18-
1916
from codesectools.datasets.core.dataset import File, PrebuiltFileDataset
2017
from codesectools.shared.cwe import CWE, CWEs
2118
from codesectools.utils import CPU_COUNT
@@ -100,6 +97,8 @@ def download_files(self: Self, test: bool = False) -> None:
10097
test: If True, reduce the number of test files for faster testing.
10198
10299
"""
100+
import requests
101+
103102
zip_file = io.BytesIO(
104103
requests.get(
105104
"https://samate.nist.gov/SARD/downloads/test-suites/2017-10-01-juliet-test-suite-for-c-cplusplus-v1-3.zip"
@@ -129,6 +128,8 @@ def load_dataset(self) -> list[TestCode]:
129128
A list of `TestCode` objects representing the dataset.
130129
131130
"""
131+
from lxml import etree
132+
132133
files = []
133134
testcode_dir = self.directory / "C" / "testcases"
134135
testcode_paths = {

codesectools/datasets/__init__.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,52 @@
1212
"""
1313

1414
import importlib
15+
from typing import Any
1516

1617
from codesectools.datasets.core.dataset import Dataset
1718
from codesectools.utils import DATASETS_DIR
1819

20+
21+
class LazyDatasetLoader:
22+
"""Lazily load a dataset class to avoid premature imports."""
23+
24+
def __init__(self, name: str) -> None:
25+
"""Initialize the lazy loader.
26+
27+
Args:
28+
name: The name of the dataset to load.
29+
30+
"""
31+
self.name = name
32+
self.loaded = False
33+
34+
def _load(self) -> None:
35+
"""Import the dataset module and class on first access."""
36+
if not self.loaded:
37+
self.dataset_module = importlib.import_module(
38+
f"codesectools.datasets.{self.name}.dataset"
39+
)
40+
self.dataset: Dataset = getattr(self.dataset_module, self.name)
41+
42+
self.loaded = True
43+
44+
def __call__(self, *args: Any, **kwargs: Any) -> Dataset:
45+
"""Create an instance of the loaded dataset class."""
46+
self._load()
47+
return self.dataset(*args, **kwargs)
48+
49+
def __getattr__(self, name: str) -> Any: # noqa: ANN401
50+
"""Proxy attribute access to the loaded dataset class."""
51+
self._load()
52+
return getattr(self.dataset, name)
53+
54+
1955
DATASETS_ALL = {}
2056
for child in DATASETS_DIR.iterdir():
2157
if child.is_dir():
2258
if list(child.glob("dataset.py")) and child.name != "core":
2359
dataset_name = child.name
2460

25-
dataset_module = importlib.import_module(
26-
f"codesectools.datasets.{dataset_name}.dataset"
27-
)
28-
dataset: Dataset = getattr(dataset_module, dataset_name)
29-
30-
DATASETS_ALL[dataset_name] = dataset
61+
DATASETS_ALL[dataset_name] = LazyDatasetLoader(dataset_name)
3162

3263
DATASETS_ALL = dict(sorted(DATASETS_ALL.items()))

codesectools/datasets/core/dataset.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,9 @@
1212
from pathlib import Path
1313
from typing import TYPE_CHECKING
1414

15-
import git
1615
import humanize
1716
import typer
1817
from rich import print
19-
from rich.panel import Panel
20-
from rich.progress import Progress
2118

2219
from codesectools.utils import USER_CACHE_DIR
2320

@@ -82,6 +79,8 @@ def is_cached(cls) -> bool:
8279

8380
def prompt_license_agreement(self) -> None:
8481
"""Display the dataset's license and prompt the user for agreement."""
82+
from rich.panel import Panel
83+
8584
panel = Panel(
8685
f"""Dataset:\t[b]{self.name}[/b]
8786
License:\t[b]{self.license}[/b]
@@ -122,6 +121,8 @@ def download_dataset(self, test: bool = False) -> None:
122121
test: If True, download a smaller subset of the dataset for testing.
123122
124123
"""
124+
from rich.progress import Progress
125+
125126
self.prompt_license_agreement()
126127
with Progress() as progress:
127128
progress.add_task(f"Downloading [b]{self.name}[/b]...", total=None)
@@ -538,7 +539,9 @@ def save(self, dir: Path) -> None:
538539
dir: The path to the directory where the repository should be cloned.
539540
540541
"""
541-
repo = git.Repo.clone_from(self.url, dir)
542+
from git import Repo
543+
544+
repo = Repo.clone_from(self.url, dir)
542545
repo.git.checkout(self.commit)
543546

544547

codesectools/sasts/__init__.py

Lines changed: 78 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,38 +16,90 @@
1616

1717
import importlib
1818

19-
import typer
20-
19+
from codesectools.sasts.core.cli import CLIFactory
2120
from codesectools.sasts.core.sast import SAST, AnalysisResult
21+
from codesectools.sasts.core.sast.properties import SASTProperties
22+
from codesectools.sasts.core.sast.requirements import SASTRequirement
2223
from codesectools.utils import SASTS_DIR
2324

25+
26+
class LazySASTLoader:
27+
"""Lazily load SAST tool components to avoid premature imports."""
28+
29+
def __init__(self, name: str) -> None:
30+
"""Initialize the lazy loader.
31+
32+
Args:
33+
name: The name of the SAST tool to load.
34+
35+
"""
36+
self.name = name
37+
self.loaded = False
38+
39+
def _load(self) -> None:
40+
"""Import the SAST modules and classes on first access."""
41+
if not self.loaded:
42+
sast_module = importlib.import_module(
43+
f"codesectools.sasts.tools.{self.name}.sast"
44+
)
45+
46+
self.sast: SAST = getattr(sast_module, f"{self.name}SAST")
47+
self.sast_instance: SAST = self.sast()
48+
self.analysis_result: AnalysisResult = getattr(
49+
sast_module, f"{self.name}AnalysisResult"
50+
)
51+
52+
self.cli_module = importlib.import_module(
53+
f"codesectools.sasts.tools.{self.name}.cli"
54+
)
55+
self.cli_factory: CLIFactory = getattr(
56+
self.cli_module, f"{self.name}CLIFactory"
57+
)
58+
59+
self._data = {
60+
"status": self.sast_instance.status,
61+
"missing": self.sast_instance.missing,
62+
"properties": self.sast_instance.properties,
63+
"sast": self.sast,
64+
"analysis_result": self.analysis_result,
65+
"cli_factory": self.cli_factory,
66+
}
67+
68+
self.loaded = True
69+
70+
def __getitem__(
71+
self, name: str
72+
) -> (
73+
str
74+
| list[SASTRequirement]
75+
| SASTProperties
76+
| SAST
77+
| AnalysisResult
78+
| CLIFactory
79+
):
80+
"""Provide dictionary-like access to the loaded SAST components."""
81+
self._load()
82+
return self._data[name]
83+
84+
def __setitem__(
85+
self,
86+
name: str,
87+
value: str
88+
| list[SASTRequirement]
89+
| SASTProperties
90+
| SAST
91+
| AnalysisResult
92+
| CLIFactory,
93+
) -> None:
94+
"""Provide dictionary-like write access to the loaded SAST components."""
95+
self._load()
96+
self._data[name] = value
97+
98+
2499
SASTS_ALL = {}
25100
for child in (SASTS_DIR / "tools").iterdir():
26101
if child.is_dir():
27102
sast_name = child.name
28-
29-
sast_module = importlib.import_module(
30-
f"codesectools.sasts.tools.{sast_name}.sast"
31-
)
32-
33-
sast: SAST = getattr(sast_module, f"{sast_name}SAST")
34-
sast_instance = sast()
35-
analysis_result: AnalysisResult = getattr(
36-
sast_module, f"{sast_name}AnalysisResult"
37-
)
38-
39-
cli_module = importlib.import_module(
40-
f"codesectools.sasts.tools.{sast_name}.cli"
41-
)
42-
cli_factory: typer.Typer = getattr(cli_module, f"{sast_name}CLIFactory")
43-
44-
SASTS_ALL[sast_name] = {
45-
"status": sast_instance.status,
46-
"missing": sast_instance.missing,
47-
"properties": sast_instance.properties,
48-
"sast": sast,
49-
"analysis_result": analysis_result,
50-
"cli_factory": cli_factory,
51-
}
103+
SASTS_ALL[sast_name] = LazySASTLoader(sast_name)
52104

53105
SASTS_ALL = dict(sorted(SASTS_ALL.items()))

codesectools/sasts/all/cli.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,11 @@
88
import typer
99
from click import Choice
1010
from rich import print
11-
from rich.console import Console
12-
from rich.style import Style
13-
from rich.syntax import Syntax
14-
from rich.table import Table
15-
from rich.text import Text
1611
from typing_extensions import Annotated
1712

1813
from codesectools.datasets import DATASETS_ALL
1914
from codesectools.datasets.core.dataset import FileDataset, GitRepoDataset
2015
from codesectools.sasts import SASTS_ALL
21-
from codesectools.sasts.all.graphics import ProjectGraphics
2216
from codesectools.sasts.all.sast import AllSAST
2317
from codesectools.sasts.core.sast import PrebuiltBuildlessSAST, PrebuiltSAST
2418
from codesectools.utils import group_successive
@@ -37,6 +31,8 @@ def main() -> None:
3731
@cli.command(help="List used SAST tools.")
3832
def info() -> None:
3933
"""Display the status of all SAST tools and their inclusion in AllSAST."""
34+
from rich.table import Table
35+
4036
table = Table(show_lines=True)
4137
table.add_column("SAST", justify="center", no_wrap=True)
4238
table.add_column("Status", justify="center", no_wrap=True)
@@ -156,6 +152,8 @@ def benchmark(
156152
@cli.command(name="list", help="List existing analysis results.")
157153
def list_() -> None:
158154
"""List existing analysis results for projects and datasets."""
155+
from rich.table import Table
156+
159157
table = Table(show_lines=True)
160158
table.add_column("Name", justify="center", no_wrap=True)
161159
table.add_column("Type", justify="center", no_wrap=True)
@@ -218,6 +216,8 @@ def plot(
218216
] = False,
219217
) -> None:
220218
"""Generate and display plots for a project's aggregated analysis results."""
219+
from codesectools.sasts.all.graphics import ProjectGraphics
220+
221221
project_graphics = ProjectGraphics(project_name=project)
222222
project_graphics.export(overwrite=overwrite, show=show, pgf=pgf)
223223

@@ -239,6 +239,12 @@ def report(
239239
] = False,
240240
) -> None:
241241
"""Generate an HTML report for a project's aggregated analysis results."""
242+
from rich.console import Console
243+
from rich.style import Style
244+
from rich.syntax import Syntax
245+
from rich.table import Table
246+
from rich.text import Text
247+
242248
report_dir = all_sast.output_dir / project / "report"
243249
if report_dir.is_dir():
244250
if overwrite:

codesectools/sasts/all/graphics.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def export(self, overwrite: bool, pgf: bool, show: bool) -> None:
9191
fig.savefig(figure_path_pgf, bbox_inches="tight")
9292
print(f"Figure {fig_name} exported to pgf")
9393

94+
plt.close(fig)
95+
9496

9597
## Single project
9698
class ProjectGraphics(Graphics):

0 commit comments

Comments
 (0)