diff --git a/misc/diff-cache.py b/misc/diff-cache.py index 7e33e17fd909..1f1b2a39c4bb 100644 --- a/misc/diff-cache.py +++ b/misc/diff-cache.py @@ -115,7 +115,7 @@ def load(cache: MetadataStore, s: str) -> Any: return data normalize_meta(meta) return serialize_meta_ff(meta, version_prefix) - if s.endswith(".data.ff"): + if s.endswith((".data.ff", ".err.ff")): return data obj = json_loads(data) if s.endswith(".meta.json"): diff --git a/mypy/build.py b/mypy/build.py index 4fe6f52f5828..f079d3a63663 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -69,6 +69,7 @@ Tag, WriteBuffer, read_bytes, + read_errors, read_int, read_int_list, read_int_opt, @@ -76,6 +77,7 @@ read_str_list, read_str_opt, write_bytes, + write_errors, write_int, write_int_list, write_int_opt, @@ -1626,6 +1628,13 @@ def create_metastore(options: Options, parallel_worker: bool = False) -> Metadat return mds +def get_errors_name(meta_name: str) -> str: + # Convert e.g. foo.bar.meta.ff to foo.bar.err.ff + parts = meta_name.rsplit(".", maxsplit=2) + parts[1] = "err" + return ".".join(parts) + + def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str | None]: """Return the file names for the cache files. @@ -1688,7 +1697,7 @@ def options_snapshot(id: str, manager: BuildManager) -> dict[str, object]: def find_cache_meta( id: str, path: str, manager: BuildManager, skip_validation: bool = False -) -> CacheMeta | None: +) -> tuple[CacheMeta | None, list[ErrorTuple]]: """Find cache data for a module. Args: @@ -1705,15 +1714,12 @@ def find_cache_meta( meta_file, data_file, _ = get_cache_names(id, path, manager.options) if manager.tracing_enabled: manager.trace(f"Looking for {id} at {meta_file}") - meta: bytes | dict[str, Any] | None if manager.stats_enabled: t0 = time.time() if manager.options.fixed_format_cache: meta = _load_ff_file( meta_file, manager, log_error_fmt="Could not load cache for {}: ", id=id ) - if meta is None: - return None else: meta = _load_json_file( meta_file, @@ -1721,13 +1727,8 @@ def find_cache_meta( log_success=f"Meta {id} ", log_error=f"Could not load cache for {id}: ", ) - if meta is None: - return None - if not isinstance(meta, dict): - manager.log( # type: ignore[unreachable] - f"Could not load cache for {id}: meta cache is not a dict: {repr(meta)}" - ) - return None + if meta is None: + return None, [] if manager.stats_enabled: t1 = time.time() if isinstance(meta, bytes): @@ -1736,31 +1737,31 @@ def find_cache_meta( # TODO: switch to something like librt.internal.read_byte() if this is slow. if meta[0] != cache_version() or meta[1] != CACHE_VERSION: manager.log(f"Metadata abandoned for {id}: incompatible cache format") - return None + return None, [] data_io = ReadBuffer(meta[2:]) m = CacheMeta.read(data_io, data_file) else: m = CacheMeta.deserialize(meta, data_file) if m is None: manager.log(f"Metadata abandoned for {id}: cannot deserialize data") - return None + return None, [] if manager.stats_enabled: t2 = time.time() manager.add_stats( load_meta_time=t2 - t0, load_meta_load_time=t1 - t0, load_meta_from_dict_time=t2 - t1 ) if skip_validation: - return m + return m, [] # Ignore cache if generated by an older mypy version. if m.version_id != manager.version_id and not manager.options.skip_version_check: manager.log(f"Metadata abandoned for {id}: different mypy version") - return None + return None, [] total_deps = len(m.dependencies) + len(m.suppressed) if len(m.dep_prios) != total_deps or len(m.dep_lines) != total_deps: manager.log(f"Metadata abandoned for {id}: broken dependencies") - return None + return None, [] # Ignore cache if (relevant) options aren't the same. # Note that it's fine to mutilate cached_options since it's only used here. @@ -1782,12 +1783,12 @@ def find_cache_meta( key, cached_options.get(key), current_options.get(key) ) ) - return None + return None, [] if manager.old_plugins_snapshot and manager.plugins_snapshot: # Check if plugins are still the same. if manager.plugins_snapshot != manager.old_plugins_snapshot: manager.log(f"Metadata abandoned for {id}: plugins differ") - return None + return None, [] plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=True)) if not manager.options.fixed_format_cache: # So that plugins can return data with tuples in it without @@ -1796,10 +1797,31 @@ def find_cache_meta( plugin_data = json_loads(json_dumps(plugin_data)) if m.plugin_data != plugin_data: manager.log(f"Metadata abandoned for {id}: plugin configuration differs") - return None + return None, [] + # Load cached errors for this file, even if empty. This is needed to avoid + # invalid cache state after a crash/blocker/Ctrl+C etc. + errors_file = get_errors_name(meta_file) + if manager.options.fixed_format_cache: + errors = _load_ff_file( + errors_file, manager, log_error_fmt="Could not load errors for {}: ", id=id + ) + else: + errors = _load_json_file( + errors_file, + manager, + log_success=f"Errors {id} ", + log_error=f"Could not load errors for {id}: ", + ) + if errors is None: + return None, [] + if isinstance(errors, bytes): + data_io = ReadBuffer(errors) + e = read_errors(data_io) + else: + e = [tuple(err) for err in errors["error_lines"]] manager.add_stats(fresh_metas=1) - return m + return m, e def validate_meta( @@ -2078,9 +2100,8 @@ def write_cache( version_id=manager.version_id, ignore_all=ignore_all, plugin_data=plugin_data, - # These two will be filled by the caller. + # This one will be filled by the caller. dep_hashes=[], - error_lines=[], ) return interface_hash, (meta, meta_file) @@ -2104,6 +2125,23 @@ def write_cache_meta(meta: CacheMeta, manager: BuildManager, meta_file: str) -> manager.log(f"Error writing cache meta file {meta_file}") +def write_errors_file( + meta_file: str, error_lines: list[ErrorTuple], manager: BuildManager +) -> None: + # Write errors cache file + errors_file = get_errors_name(meta_file) + metastore = manager.metastore + if manager.options.fixed_format_cache: + data_io = WriteBuffer() + write_errors(data_io, error_lines) + meta_bytes = data_io.getvalue() + else: + # Some generic JSON helpers require top-level to be a dict. + meta_bytes = json_dumps({"error_lines": error_lines}, manager.options.debug_cache) + if not metastore.write(errors_file, meta_bytes): + manager.log(f"Error writing errors file {errors_file}") + + """Dependency manager. Design @@ -2393,7 +2431,7 @@ def new_state( interface_hash = b"" meta_source_hash = None if path and source is None and manager.cache_enabled: - meta = find_cache_meta(id, path, manager) + meta, error_lines = find_cache_meta(id, path, manager) # TODO: Get mtime if not cached. if meta is not None: interface_hash = meta.interface_hash @@ -2420,7 +2458,7 @@ def new_state( assert len(meta.dep_hashes) == len(meta.dependencies) dep_hashes = {k: v for (k, v) in zip(meta.dependencies, meta.dep_hashes)} # Only copy `error_lines` if the module is not silently imported. - error_lines = [] if ignore_all else meta.error_lines + error_lines = [] if ignore_all else error_lines imports_ignored = meta.imports_ignored else: dependencies = [] @@ -2656,7 +2694,7 @@ def reload_meta(self) -> None: the interface hash. """ assert self.path is not None - self.meta = find_cache_meta(self.id, self.path, self.manager, skip_validation=True) + self.meta, _ = find_cache_meta(self.id, self.path, self.manager, skip_validation=True) assert self.meta is not None self.interface_hash = self.meta.interface_hash @@ -4355,8 +4393,8 @@ def process_stale_scc( continue meta, meta_file = meta_tuple meta.dep_hashes = [graph[dep].interface_hash for dep in graph[id].dependencies] - meta.error_lines = errors_by_id.get(id, []) write_cache_meta(meta, manager, meta_file) + write_errors_file(meta_file, errors_by_id.get(id, []), manager) manager.done_sccs.add(ascc.id) manager.add_stats( load_missing_time=t1 - t0, diff --git a/mypy/cache.py b/mypy/cache.py index 5a1d6c79219e..0adc7affb8cb 100644 --- a/mypy/cache.py +++ b/mypy/cache.py @@ -69,7 +69,7 @@ from mypy_extensions import u8 # High-level cache layout format -CACHE_VERSION: Final = 6 +CACHE_VERSION: Final = 7 # Type used internally to represent errors: # (path, line, column, end_line, end_column, severity, message, code) @@ -99,7 +99,6 @@ def __init__( dep_hashes: list[bytes], interface_hash: bytes, trans_dep_hash: bytes, - error_lines: list[ErrorTuple], version_id: str, ignore_all: bool, plugin_data: Any, @@ -123,7 +122,6 @@ def __init__( self.dep_hashes = dep_hashes # list of interface_hash for dependencies self.interface_hash = interface_hash # hash representing the public interface self.trans_dep_hash = trans_dep_hash # hash of import structure (transitive) - self.error_lines = error_lines self.version_id = version_id # mypy version for cache invalidation self.ignore_all = ignore_all # if errors were ignored self.plugin_data = plugin_data # config data from plugins @@ -146,7 +144,6 @@ def serialize(self) -> dict[str, Any]: "dep_hashes": [dep.hex() for dep in self.dep_hashes], "interface_hash": self.interface_hash.hex(), "trans_dep_hash": self.trans_dep_hash.hex(), - "error_lines": self.error_lines, "version_id": self.version_id, "ignore_all": self.ignore_all, "plugin_data": self.plugin_data, @@ -175,7 +172,6 @@ def deserialize(cls, meta: dict[str, Any], data_file: str) -> CacheMeta | None: dep_hashes=[bytes.fromhex(dep) for dep in meta["dep_hashes"]], interface_hash=bytes.fromhex(meta["interface_hash"]), trans_dep_hash=bytes.fromhex(meta["trans_dep_hash"]), - error_lines=[tuple(err) for err in meta["error_lines"]], version_id=meta["version_id"], ignore_all=meta["ignore_all"], plugin_data=meta["plugin_data"], @@ -203,7 +199,6 @@ def write(self, data: WriteBuffer) -> None: write_bytes_list(data, self.dep_hashes) write_bytes(data, self.interface_hash) write_bytes(data, self.trans_dep_hash) - write_errors(data, self.error_lines) write_str(data, self.version_id) write_bool(data, self.ignore_all) # Plugin data may be not a dictionary, so we use @@ -233,7 +228,6 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None: dep_hashes=read_bytes_list(data), interface_hash=read_bytes(data), trans_dep_hash=read_bytes(data), - error_lines=read_errors(data), version_id=read_str(data), ignore_all=read_bool(data), plugin_data=read_json_value(data), diff --git a/mypy/exportjson.py b/mypy/exportjson.py index 922b3ecb3088..c08f0f9f2911 100644 --- a/mypy/exportjson.py +++ b/mypy/exportjson.py @@ -572,7 +572,6 @@ def convert_binary_cache_meta_to_json(data: bytes, data_file: str) -> Json: "dep_lines": meta.dep_lines, "dep_hashes": [dep.hex() for dep in meta.dep_hashes], "interface_hash": meta.interface_hash.hex(), - "error_lines": meta.error_lines, "version_id": meta.version_id, "ignore_all": meta.ignore_all, "plugin_data": meta.plugin_data, diff --git a/mypy/test/test_diff_cache.py b/mypy/test/test_diff_cache.py index a62c3a18c696..d35cd99f342d 100644 --- a/mypy/test/test_diff_cache.py +++ b/mypy/test/test_diff_cache.py @@ -127,7 +127,7 @@ def test_diff_cache_produces_valid_json(self) -> None: a_keys = {k for k in keys if "/a." in k or k.startswith("a.")} assert len(a_keys) == 0, f"Unexpected a.* entries in diff: {a_keys}" assert len(b_keys) == 2, f"Expected 2 b.* entries in diff, got: {b_keys}" - assert len(c_keys) == 3, f"Expected 3 c.* entries in diff, got: {c_keys}" + assert len(c_keys) == 4, f"Expected 3 c.* entries in diff, got: {c_keys}" # The new access to a.x in b.py should create a fine-grained # dependency recorded in @root.deps.json. diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index cbb9c0235fed..96d6f536b961 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -293,7 +293,7 @@ def find_missing_cache_files( ignore_errors = True missing = {} for id, path in modules.items(): - meta = build.find_cache_meta(id, path, manager) + meta, _ = build.find_cache_meta(id, path, manager) if not build.validate_meta(meta, id, path, ignore_errors, manager): missing[id] = path return set(missing.values()) diff --git a/test-data/unit/exportjson.test b/test-data/unit/exportjson.test index 43e061dfda32..2d6a8c56b20f 100644 --- a/test-data/unit/exportjson.test +++ b/test-data/unit/exportjson.test @@ -317,7 +317,6 @@ from typing_extensions import Final "" ], "interface_hash": "", - "error_lines": [], "version_id": ..., "ignore_all": false, "plugin_data": null