Skip to content

Commit 558e62c

Browse files
authored
Cache: Write data map file from rust (#259)
Write data map file from rust
1 parent 558fc35 commit 558e62c

10 files changed

Lines changed: 181 additions & 56 deletions

File tree

rust/src/caching.rs

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,34 @@ use crate::errors::{GrimpError, GrimpResult};
22
use crate::filesystem::get_file_system_boxed;
33
use crate::import_scanning::{DirectImport, imports_by_module_to_py};
44
use crate::module_finding::Module;
5-
use pyo3::types::PyDict;
6-
use pyo3::{Bound, PyAny, PyResult, Python, pyfunction};
5+
use pyo3::types::PyAnyMethods;
6+
use pyo3::types::{PyDict, PySet};
7+
use pyo3::types::{PyDictMethods, PySetMethods};
8+
use pyo3::{Bound, FromPyObject, PyAny, PyResult, Python, pyfunction};
79
use std::collections::{HashMap, HashSet};
810

11+
/// Writes the cache file containing all the imports for a given package.
12+
/// Args:
13+
/// - filename: str
14+
/// - imports_by_module: dict[Module, Set[DirectImport]]
15+
/// - file_system: The file system interface to use. (A BasicFileSystem.)
16+
#[pyfunction]
17+
pub fn write_cache_data_map_file<'py>(
18+
filename: &str,
19+
imports_by_module: Bound<'py, PyDict>,
20+
file_system: Bound<'py, PyAny>,
21+
) -> PyResult<()> {
22+
let mut file_system_boxed = get_file_system_boxed(&file_system)?;
23+
24+
let ImportsByModule(imports_by_module_rust) = imports_by_module.extract()?;
25+
26+
let file_contents = serialize_imports_by_module(&imports_by_module_rust);
27+
28+
file_system_boxed.write(filename, &file_contents)?;
29+
30+
Ok(())
31+
}
32+
933
/// Reads the cache file containing all the imports for a given package.
1034
/// Args:
1135
/// - filename: str
@@ -26,6 +50,52 @@ pub fn read_cache_data_map_file<'py>(
2650
Ok(imports_by_module_to_py(py, imports_by_module))
2751
}
2852

53+
/// A newtype wrapper for HashMap<Module, HashSet<DirectImport>> that implements FromPyObject.
54+
pub struct ImportsByModule(pub HashMap<Module, HashSet<DirectImport>>);
55+
56+
impl<'py> FromPyObject<'py> for ImportsByModule {
57+
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
58+
let py_dict = ob.downcast::<PyDict>()?;
59+
let mut imports_by_module_rust = HashMap::new();
60+
61+
for (py_key, py_value) in py_dict.iter() {
62+
let module: Module = py_key.extract()?;
63+
let py_set = py_value.downcast::<PySet>()?;
64+
let mut hashset: HashSet<DirectImport> = HashSet::new();
65+
for element in py_set.iter() {
66+
let direct_import: DirectImport = element.extract()?;
67+
hashset.insert(direct_import);
68+
}
69+
imports_by_module_rust.insert(module, hashset);
70+
}
71+
72+
Ok(ImportsByModule(imports_by_module_rust))
73+
}
74+
}
75+
76+
fn serialize_imports_by_module(
77+
imports_by_module: &HashMap<Module, HashSet<DirectImport>>,
78+
) -> String {
79+
let raw_map: HashMap<&str, Vec<(&str, usize, &str)>> = imports_by_module
80+
.iter()
81+
.map(|(module, imports)| {
82+
let imports_vec: Vec<(&str, usize, &str)> = imports
83+
.iter()
84+
.map(|import| {
85+
(
86+
import.imported.as_str(),
87+
import.line_number,
88+
import.line_contents.as_str(),
89+
)
90+
})
91+
.collect();
92+
(module.name.as_str(), imports_vec)
93+
})
94+
.collect();
95+
96+
serde_json::to_string(&raw_map).expect("Failed to serialize to JSON")
97+
}
98+
2999
pub fn parse_json_to_map(
30100
json_str: &str,
31101
filename: &str,

rust/src/filesystem.rs

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ use regex::Regex;
55
use std::collections::HashMap;
66
use std::ffi::OsStr;
77
use std::fs;
8+
use std::fs::File;
9+
use std::io::prelude::*;
810
use std::path::{Path, PathBuf};
9-
use std::sync::LazyLock;
11+
use std::sync::{Arc, LazyLock, Mutex};
1012
use unindent::unindent;
1113

1214
static ENCODING_RE: LazyLock<Regex> =
@@ -22,17 +24,19 @@ pub trait FileSystem: Send + Sync {
2224
fn exists(&self, file_name: &str) -> bool;
2325

2426
fn read(&self, file_name: &str) -> PyResult<String>;
27+
28+
fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()>;
2529
}
2630

2731
#[derive(Clone)]
2832
#[pyclass]
29-
pub struct RealBasicFileSystem {}
33+
struct RealBasicFileSystem {}
3034

3135
// Implements a BasicFileSystem (defined in grimp.application.ports.filesystem.BasicFileSystem)
3236
// that actually reads files.
3337
#[pyclass(name = "RealBasicFileSystem")]
3438
pub struct PyRealBasicFileSystem {
35-
pub inner: RealBasicFileSystem,
39+
inner: RealBasicFileSystem,
3640
}
3741

3842
impl FileSystem for RealBasicFileSystem {
@@ -129,6 +133,16 @@ impl FileSystem for RealBasicFileSystem {
129133
})
130134
}
131135
}
136+
137+
fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
138+
let file_path: PathBuf = file_name.into();
139+
if let Some(patent_dir) = file_path.parent() {
140+
fs::create_dir_all(patent_dir)?;
141+
}
142+
File::create(file_path)?
143+
.write_all(contents.as_bytes())
144+
.map_err(Into::into)
145+
}
132146
}
133147

134148
#[pymethods]
@@ -161,19 +175,23 @@ impl PyRealBasicFileSystem {
161175
fn read(&self, file_name: &str) -> PyResult<String> {
162176
self.inner.read(file_name)
163177
}
178+
179+
fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
180+
self.inner.write(file_name, contents)
181+
}
164182
}
165183

166184
type FileSystemContents = HashMap<String, String>;
167185

168186
#[derive(Clone)]
169-
pub struct FakeBasicFileSystem {
170-
contents: Box<FileSystemContents>,
187+
struct FakeBasicFileSystem {
188+
contents: Arc<Mutex<FileSystemContents>>,
171189
}
172190

173191
// Implements BasicFileSystem (defined in grimp.application.ports.filesystem.BasicFileSystem).
174192
#[pyclass(name = "FakeBasicFileSystem")]
175193
pub struct PyFakeBasicFileSystem {
176-
pub inner: FakeBasicFileSystem,
194+
inner: FakeBasicFileSystem,
177195
}
178196

179197
impl FakeBasicFileSystem {
@@ -190,7 +208,7 @@ impl FakeBasicFileSystem {
190208
parsed_contents.extend(unindented_map);
191209
};
192210
Ok(FakeBasicFileSystem {
193-
contents: Box::new(parsed_contents),
211+
contents: Arc::new(Mutex::new(parsed_contents)),
194212
})
195213
}
196214
}
@@ -232,17 +250,25 @@ impl FileSystem for FakeBasicFileSystem {
232250

233251
/// Checks if a file or directory exists within the file system.
234252
fn exists(&self, file_name: &str) -> bool {
235-
self.contents.contains_key(file_name)
253+
self.contents.lock().unwrap().contains_key(file_name)
236254
}
237255

238256
fn read(&self, file_name: &str) -> PyResult<String> {
239-
match self.contents.get(file_name) {
240-
Some(file_name) => Ok(file_name.clone()),
257+
let contents = self.contents.lock().unwrap();
258+
match contents.get(file_name) {
259+
Some(file_contents) => Ok(file_contents.clone()),
241260
None => Err(PyFileNotFoundError::new_err(format!(
242261
"No such file: {file_name}"
243262
))),
244263
}
245264
}
265+
266+
#[allow(unused_variables)]
267+
fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
268+
let mut contents_mut = self.contents.lock().unwrap();
269+
contents_mut.insert(file_name.to_string(), contents.to_string());
270+
Ok(())
271+
}
246272
}
247273

248274
#[pymethods]
@@ -278,6 +304,10 @@ impl PyFakeBasicFileSystem {
278304
self.inner.read(file_name)
279305
}
280306

307+
fn write(&mut self, file_name: &str, contents: &str) -> PyResult<()> {
308+
self.inner.write(file_name, contents)
309+
}
310+
281311
// Temporary workaround method for Python tests.
282312
fn convert_to_basic(&self) -> PyResult<Self> {
283313
Ok(PyFakeBasicFileSystem {
@@ -289,7 +319,7 @@ impl PyFakeBasicFileSystem {
289319
/// Parses an indented string representing a file system structure
290320
/// into a HashMap where keys are full file paths.
291321
/// See tests.adaptors.filesystem.FakeFileSystem for the API.
292-
pub fn parse_indented_file_system_string(file_system_string: &str) -> HashMap<String, String> {
322+
fn parse_indented_file_system_string(file_system_string: &str) -> HashMap<String, String> {
293323
let mut file_paths_map: HashMap<String, String> = HashMap::new();
294324
let mut path_stack: Vec<String> = Vec::new(); // Stores current directory path components
295325
let mut first_line = true; // Flag to handle the very first path component
@@ -381,7 +411,6 @@ pub fn get_file_system_boxed<'py>(
381411
file_system: &Bound<'py, PyAny>,
382412
) -> PyResult<Box<dyn FileSystem + Send + Sync>> {
383413
let file_system_boxed: Box<dyn FileSystem + Send + Sync>;
384-
385414
if let Ok(py_real) = file_system.extract::<PyRef<PyRealBasicFileSystem>>() {
386415
file_system_boxed = Box::new(py_real.inner.clone());
387416
} else if let Ok(py_fake) = file_system.extract::<PyRef<PyFakeBasicFileSystem>>() {
@@ -391,5 +420,6 @@ pub fn get_file_system_boxed<'py>(
391420
"file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem",
392421
));
393422
}
423+
394424
Ok(file_system_boxed)
395425
}

rust/src/import_scanning.rs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,23 @@ pub struct DirectImport {
1818
pub line_contents: String,
1919
}
2020

21-
pub fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSet<FoundPackage> {
21+
impl<'py> FromPyObject<'py> for DirectImport {
22+
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
23+
let importer: String = ob.getattr("importer")?.getattr("name")?.extract()?;
24+
let imported: String = ob.getattr("imported")?.getattr("name")?.extract()?;
25+
let line_number: usize = ob.getattr("line_number")?.extract()?;
26+
let line_contents: String = ob.getattr("line_contents")?.extract()?;
27+
28+
Ok(DirectImport {
29+
importer,
30+
imported,
31+
line_number,
32+
line_contents,
33+
})
34+
}
35+
}
36+
37+
fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSet<FoundPackage> {
2238
let py_set = py_found_packages
2339
.downcast::<PySet>()
2440
.expect("Expected py_found_packages to be a Python set.");
@@ -36,7 +52,7 @@ pub fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSe
3652
rust_found_packages
3753
}
3854

39-
pub fn get_modules_from_found_packages(found_packages: &HashSet<FoundPackage>) -> HashSet<Module> {
55+
fn get_modules_from_found_packages(found_packages: &HashSet<FoundPackage>) -> HashSet<Module> {
4056
let mut modules = HashSet::new();
4157
for package in found_packages {
4258
for module_file in &package.module_files {
@@ -57,7 +73,7 @@ fn module_is_descendant(module_name: &str, potential_ancestor: &str) -> bool {
5773
/// Statically analyses the given module and returns a set of Modules that
5874
/// it imports.
5975
#[allow(clippy::borrowed_box)]
60-
pub fn scan_for_imports_no_py(
76+
fn scan_for_imports_no_py(
6177
file_system: &Box<dyn FileSystem + Send + Sync>,
6278
found_packages: &HashSet<FoundPackage>,
6379
include_external_packages: bool,
@@ -153,7 +169,7 @@ fn scan_for_imports_no_py_single_module(
153169
Ok(imports)
154170
}
155171

156-
pub fn to_py_direct_imports<'a>(
172+
fn to_py_direct_imports<'a>(
157173
py: Python<'a>,
158174
rust_imports: &HashSet<DirectImport>,
159175
) -> Bound<'a, PySet> {

rust/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ mod _rustgrimp {
1818
#[pymodule_export]
1919
use crate::caching::read_cache_data_map_file;
2020

21+
#[pymodule_export]
22+
use crate::caching::write_cache_data_map_file;
23+
2124
#[pymodule_export]
2225
use crate::graph::GraphWrapper;
2326

src/grimp/adaptors/caching.py

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import logging
55
from typing import Optional
66

7-
from grimp.application.ports.filesystem import AbstractFileSystem
7+
from grimp.application.ports.filesystem import BasicFileSystem
88
from grimp.application.ports.modulefinder import FoundPackage, ModuleFile
99
from grimp.domain.valueobjects import DirectImport, Module
1010

@@ -77,7 +77,7 @@ def __init__(self, *args, namer: type[CacheFileNamer], **kwargs) -> None:
7777
@classmethod
7878
def setup(
7979
cls,
80-
file_system: AbstractFileSystem,
80+
file_system: BasicFileSystem,
8181
found_packages: set[FoundPackage],
8282
include_external_packages: bool,
8383
exclude_type_checking_imports: bool = False,
@@ -122,22 +122,6 @@ def write(
122122
) -> None:
123123
self._write_marker_files_if_not_already_there()
124124
# Write data file.
125-
primitives_map: PrimitiveFormat = {}
126-
for found_package in self.found_packages:
127-
primitives_map_for_found_package: PrimitiveFormat = {
128-
module_file.module.name: [
129-
(
130-
direct_import.imported.name,
131-
direct_import.line_number,
132-
direct_import.line_contents,
133-
)
134-
for direct_import in imports_by_module[module_file.module]
135-
]
136-
for module_file in found_package.module_files
137-
}
138-
primitives_map.update(primitives_map_for_found_package)
139-
140-
serialized = json.dumps(primitives_map)
141125
data_cache_filename = self.file_system.join(
142126
self.cache_dir,
143127
self._namer.make_data_file_name(
@@ -146,7 +130,12 @@ def write(
146130
exclude_type_checking_imports=self.exclude_type_checking_imports,
147131
),
148132
)
149-
self.file_system.write(data_cache_filename, serialized)
133+
rust.write_cache_data_map_file(
134+
filename=data_cache_filename,
135+
imports_by_module=imports_by_module,
136+
file_system=self.file_system,
137+
)
138+
150139
logger.info(f"Wrote data cache file {data_cache_filename}.")
151140

152141
# Write meta files.
@@ -202,7 +191,7 @@ def _read_data_map_file(self) -> dict[Module, set[DirectImport]]:
202191
)
203192
try:
204193
imports_by_module = rust.read_cache_data_map_file(
205-
data_cache_filename, self.file_system.convert_to_basic()
194+
data_cache_filename, self.file_system
206195
)
207196
except FileNotFoundError:
208197
logger.info(f"No cache file: {data_cache_filename}.")

src/grimp/application/ports/caching.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from grimp.application.ports.modulefinder import FoundPackage, ModuleFile
22
from grimp.domain.valueobjects import DirectImport, Module
33

4-
from .filesystem import AbstractFileSystem
4+
from .filesystem import BasicFileSystem
55

66

77
class CacheMiss(Exception):
@@ -11,7 +11,7 @@ class CacheMiss(Exception):
1111
class Cache:
1212
def __init__(
1313
self,
14-
file_system: AbstractFileSystem,
14+
file_system: BasicFileSystem,
1515
include_external_packages: bool,
1616
exclude_type_checking_imports: bool,
1717
found_packages: set[FoundPackage],
@@ -29,7 +29,7 @@ def __init__(
2929
@classmethod
3030
def setup(
3131
cls,
32-
file_system: AbstractFileSystem,
32+
file_system: BasicFileSystem,
3333
found_packages: set[FoundPackage],
3434
*,
3535
include_external_packages: bool,

0 commit comments

Comments
 (0)