From 9227ccad6fc24db11a4a9283754ae091958ba874 Mon Sep 17 00:00:00 2001 From: p1c2u Date: Sun, 1 Mar 2026 19:48:08 +0000 Subject: [PATCH 1/6] Speed up validate() with validator caching and optional schema-check fast path --- benchmarks/__init__.py | 0 benchmarks/cases.py | 171 +++++++++++++++++++ benchmarks/compare.py | 169 +++++++++++++++++++ benchmarks/run.py | 229 ++++++++++++++++++++++++++ docs/contributing.rst | 23 +++ openapi_schema_validator/_caches.py | 107 ++++++++++++ openapi_schema_validator/settings.py | 23 +++ openapi_schema_validator/shortcuts.py | 76 +++++++-- poetry.lock | 50 +++++- pyproject.toml | 2 + tests/unit/test_shortcut.py | 71 ++++++++ 11 files changed, 899 insertions(+), 22 deletions(-) create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/cases.py create mode 100644 benchmarks/compare.py create mode 100644 benchmarks/run.py create mode 100644 openapi_schema_validator/_caches.py create mode 100644 openapi_schema_validator/settings.py diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/cases.py b/benchmarks/cases.py new file mode 100644 index 0000000..6a0d817 --- /dev/null +++ b/benchmarks/cases.py @@ -0,0 +1,171 @@ +from dataclasses import dataclass +from typing import Any + +from referencing import Registry +from referencing import Resource +from referencing.jsonschema import DRAFT202012 + +from openapi_schema_validator import OAS30Validator +from openapi_schema_validator import OAS31Validator +from openapi_schema_validator import OAS32Validator +from openapi_schema_validator import oas30_format_checker +from openapi_schema_validator import oas31_format_checker +from openapi_schema_validator import oas32_format_checker + + +@dataclass(frozen=True) +class BenchmarkCase: + name: str + validator_class: Any + schema: dict[str, Any] + instance: Any + validator_kwargs: dict[str, Any] + + +def build_cases() -> list[BenchmarkCase]: + name_schema = Resource.from_contents( + { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "string", + } + ) + age_schema = DRAFT202012.create_resource( + { + "type": "integer", + "format": "int32", + "minimum": 0, + "maximum": 120, + } + ) + registry = Registry().with_resources( + [ + ("urn:name-schema", name_schema), + ("urn:age-schema", age_schema), + ] + ) + + return [ + BenchmarkCase( + name="oas32_simple_object", + validator_class=OAS32Validator, + schema={ + "type": "object", + "required": ["name"], + "properties": { + "name": {"type": "string"}, + "enabled": {"type": "boolean"}, + }, + "additionalProperties": False, + }, + instance={"name": "svc", "enabled": True}, + validator_kwargs={"format_checker": oas32_format_checker}, + ), + BenchmarkCase( + name="oas31_prefix_items", + validator_class=OAS31Validator, + schema={ + "type": "array", + "prefixItems": [ + {"type": "number"}, + {"type": "string"}, + {"enum": ["Street", "Avenue", "Boulevard"]}, + {"enum": ["NW", "NE", "SW", "SE"]}, + ], + "items": False, + }, + instance=[1600, "Pennsylvania", "Avenue", "NW"], + validator_kwargs={"format_checker": oas31_format_checker}, + ), + BenchmarkCase( + name="oas30_nullable", + validator_class=OAS30Validator, + schema={"type": "string", "nullable": True}, + instance=None, + validator_kwargs={"format_checker": oas30_format_checker}, + ), + BenchmarkCase( + name="oas30_discriminator", + validator_class=OAS30Validator, + schema={ + "$ref": "#/components/schemas/Route", + "components": { + "schemas": { + "MountainHiking": { + "type": "object", + "properties": { + "discipline": { + "type": "string", + "enum": [ + "mountain_hiking", + "MountainHiking", + ], + }, + "length": {"type": "integer"}, + }, + "required": ["discipline", "length"], + }, + "AlpineClimbing": { + "type": "object", + "properties": { + "discipline": { + "type": "string", + "enum": ["alpine_climbing"], + }, + "height": {"type": "integer"}, + }, + "required": ["discipline", "height"], + }, + "Route": { + "oneOf": [ + { + "$ref": ( + "#/components/schemas/" + "MountainHiking" + ) + }, + { + "$ref": ( + "#/components/schemas/" + "AlpineClimbing" + ) + }, + ], + "discriminator": { + "propertyName": "discipline", + "mapping": { + "mountain_hiking": ( + "#/components/schemas/" + "MountainHiking" + ), + "alpine_climbing": ( + "#/components/schemas/" + "AlpineClimbing" + ), + }, + }, + }, + } + }, + }, + instance={"discipline": "mountain_hiking", "length": 10}, + validator_kwargs={"format_checker": oas30_format_checker}, + ), + BenchmarkCase( + name="oas32_registry_refs", + validator_class=OAS32Validator, + schema={ + "type": "object", + "required": ["name"], + "properties": { + "name": {"$ref": "urn:name-schema"}, + "age": {"$ref": "urn:age-schema"}, + }, + "additionalProperties": False, + }, + instance={"name": "John", "age": 23}, + validator_kwargs={ + "format_checker": oas32_format_checker, + "registry": registry, + }, + ), + ] diff --git a/benchmarks/compare.py b/benchmarks/compare.py new file mode 100644 index 0000000..b4dfde9 --- /dev/null +++ b/benchmarks/compare.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any + +LOWER_IS_BETTER_METRICS = { + "compile_ms", + "first_validate_ms", + "compiled_peak_memory_kib", +} +HIGHER_IS_BETTER_METRICS = { + "compiled_validations_per_second", + "helper_validations_per_second", + "helper_trusted_validations_per_second", +} +ALL_METRICS = [ + "compile_ms", + "first_validate_ms", + "compiled_validations_per_second", + "helper_validations_per_second", + "helper_trusted_validations_per_second", + "compiled_peak_memory_kib", +] + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Compare two benchmark JSON reports.", + ) + parser.add_argument( + "--baseline", + type=Path, + required=True, + help="Path to baseline benchmark JSON.", + ) + parser.add_argument( + "--candidate", + type=Path, + required=True, + help="Path to candidate benchmark JSON.", + ) + parser.add_argument( + "--regression-threshold", + type=float, + default=0.0, + help=( + "Percent threshold for regressions. " + "Example: 5 means fail only when regression exceeds 5%%." + ), + ) + parser.add_argument( + "--fail-on-regression", + action="store_true", + help="Exit with status 1 if regressions exceed threshold.", + ) + return parser.parse_args() + + +def _load_report(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def _cases_by_name(report: dict[str, Any]) -> dict[str, dict[str, Any]]: + return {case["name"]: case for case in report["cases"]} + + +def _percent_change(baseline_value: float, candidate_value: float) -> float: + if baseline_value == 0: + if candidate_value == 0: + return 0.0 + return float("inf") + return ((candidate_value - baseline_value) / baseline_value) * 100.0 + + +def _is_regression(metric: str, percent_change: float) -> bool: + if metric in LOWER_IS_BETTER_METRICS: + return percent_change > 0 + return percent_change < 0 + + +def _format_status(is_regression: bool, percent_change: float) -> str: + if abs(percent_change) < 1e-12: + return "no change (0.00%)" + + direction = "regression" if is_regression else "improvement" + sign = "+" if percent_change >= 0 else "" + return f"{direction} ({sign}{percent_change:.2f}%)" + + +def _compare_reports( + baseline: dict[str, Any], + candidate: dict[str, Any], + regression_threshold: float, +) -> tuple[list[str], list[str]]: + baseline_cases = _cases_by_name(baseline) + candidate_cases = _cases_by_name(candidate) + + report_lines: list[str] = [] + regressions: list[str] = [] + + for case_name in sorted(baseline_cases): + if case_name not in candidate_cases: + regressions.append( + f"Missing case in candidate report: {case_name}" + ) + continue + + report_lines.append(f"Case: {case_name}") + baseline_case = baseline_cases[case_name] + candidate_case = candidate_cases[case_name] + + for metric in ALL_METRICS: + baseline_value = float(baseline_case[metric]) + candidate_value = float(candidate_case[metric]) + change = _percent_change(baseline_value, candidate_value) + regression = _is_regression(metric, change) + status = _format_status(regression, change) + + report_lines.append( + " " + f"{metric}: baseline={baseline_value:.6f} " + f"candidate={candidate_value:.6f} -> {status}" + ) + + if regression and abs(change) > regression_threshold: + regressions.append( + f"{case_name} {metric} regressed by {abs(change):.2f}%" + ) + + extra_candidate_cases = set(candidate_cases).difference(baseline_cases) + for case_name in sorted(extra_candidate_cases): + report_lines.append(f"Case present only in candidate: {case_name}") + + return report_lines, regressions + + +def main() -> int: + args = _parse_args() + baseline = _load_report(args.baseline) + candidate = _load_report(args.candidate) + report_lines, regressions = _compare_reports( + baseline, + candidate, + args.regression_threshold, + ) + + print( + f"Comparing candidate {args.candidate} " + f"against baseline {args.baseline}" + ) + print("") + print("\n".join(report_lines)) + + if regressions: + print("") + print("Regressions above threshold:") + for regression in regressions: + print(f"- {regression}") + + if args.fail_on_regression: + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/benchmarks/run.py b/benchmarks/run.py new file mode 100644 index 0000000..96db549 --- /dev/null +++ b/benchmarks/run.py @@ -0,0 +1,229 @@ +from __future__ import annotations + +import argparse +import gc +import json +import platform +import statistics +import time +import tracemalloc +from datetime import datetime +from datetime import timezone +from pathlib import Path +from typing import Any + +from benchmarks.cases import BenchmarkCase +from benchmarks.cases import build_cases +from openapi_schema_validator.shortcuts import _clear_validate_cache +from openapi_schema_validator.shortcuts import validate + + +def _measure_compile_time_ms( + case: BenchmarkCase, + rounds: int, +) -> float: + samples: list[float] = [] + for _ in range(rounds): + start_ns = time.perf_counter_ns() + case.validator_class( + case.schema, + **case.validator_kwargs, + ) + elapsed_ms = (time.perf_counter_ns() - start_ns) / 1_000_000 + samples.append(elapsed_ms) + return statistics.median(samples) + + +def _measure_first_validate_ms(case: BenchmarkCase) -> float: + validator = case.validator_class(case.schema, **case.validator_kwargs) + start_ns = time.perf_counter_ns() + validator.validate(case.instance) + return (time.perf_counter_ns() - start_ns) / 1_000_000 + + +def _measure_compiled_validate_per_second( + case: BenchmarkCase, + iterations: int, + warmup: int, +) -> float: + validator = case.validator_class(case.schema, **case.validator_kwargs) + for _ in range(warmup): + validator.validate(case.instance) + + start_ns = time.perf_counter_ns() + for _ in range(iterations): + validator.validate(case.instance) + elapsed = (time.perf_counter_ns() - start_ns) / 1_000_000_000 + return iterations / elapsed + + +def _measure_helper_validate_per_second( + case: BenchmarkCase, + iterations: int, + warmup: int, + *, + check_schema: bool, +) -> float: + _clear_validate_cache() + for _ in range(warmup): + validate( + case.instance, + case.schema, + cls=case.validator_class, + check_schema=check_schema, + **case.validator_kwargs, + ) + + start_ns = time.perf_counter_ns() + for _ in range(iterations): + validate( + case.instance, + case.schema, + cls=case.validator_class, + check_schema=check_schema, + **case.validator_kwargs, + ) + elapsed = (time.perf_counter_ns() - start_ns) / 1_000_000_000 + return iterations / elapsed + + +def _measure_peak_memory_kib( + case: BenchmarkCase, + iterations: int, +) -> float: + validator = case.validator_class(case.schema, **case.validator_kwargs) + tracemalloc.start() + for _ in range(iterations): + validator.validate(case.instance) + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + return peak / 1024 + + +def _measure_case( + case: BenchmarkCase, + iterations: int, + warmup: int, + compile_rounds: int, +) -> dict[str, Any]: + gc_enabled = gc.isenabled() + gc.disable() + + try: + return { + "name": case.name, + "validator_class": case.validator_class.__name__, + "compile_ms": _measure_compile_time_ms(case, compile_rounds), + "first_validate_ms": _measure_first_validate_ms(case), + "compiled_validations_per_second": ( + _measure_compiled_validate_per_second( + case, + iterations, + warmup, + ) + ), + "helper_validations_per_second": ( + _measure_helper_validate_per_second( + case, + iterations, + warmup, + check_schema=True, + ) + ), + "helper_trusted_validations_per_second": ( + _measure_helper_validate_per_second( + case, + iterations, + warmup, + check_schema=False, + ) + ), + "compiled_peak_memory_kib": _measure_peak_memory_kib( + case, + max(iterations, 100), + ), + } + finally: + if gc_enabled: + gc.enable() + + +def _build_report( + cases: list[BenchmarkCase], + iterations: int, + warmup: int, + compile_rounds: int, +) -> dict[str, Any]: + results = [ + _measure_case( + case, + iterations=iterations, + warmup=warmup, + compile_rounds=compile_rounds, + ) + for case in cases + ] + return { + "timestamp_utc": datetime.now(timezone.utc).isoformat(), + "python_version": platform.python_version(), + "platform": platform.platform(), + "benchmark_parameters": { + "iterations": iterations, + "warmup": warmup, + "compile_rounds": compile_rounds, + }, + "cases": results, + } + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run validation performance benchmarks.", + ) + parser.add_argument( + "--iterations", + type=int, + default=1000, + help="Measured validation iterations per case.", + ) + parser.add_argument( + "--warmup", + type=int, + default=100, + help="Warmup iterations per case.", + ) + parser.add_argument( + "--compile-rounds", + type=int, + default=50, + help="Schema compile measurements per case.", + ) + parser.add_argument( + "--output", + type=Path, + default=Path("reports/benchmarks/python-baseline.json"), + help="Path to write JSON benchmark report.", + ) + return parser.parse_args() + + +def main() -> int: + args = _parse_args() + cases = build_cases() + report = _build_report( + cases, + iterations=args.iterations, + warmup=args.warmup, + compile_rounds=args.compile_rounds, + ) + + output_path = args.output + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(report, indent=2), encoding="utf-8") + + print(f"Saved benchmark report to {output_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/docs/contributing.rst b/docs/contributing.rst index 614fb06..64f9f27 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -74,3 +74,26 @@ To run all checks on all files, enter: pre-commit run --all-files Pre-commit check results are also attached to your PR through integration with Github Action. + +Performance benchmark +^^^^^^^^^^^^^^^^^^^^^ + +The ``validate`` shortcut uses an internal compiled-validator cache. You can +adjust cache capacity with the ``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` +environment variable (default: ``128``). + +To collect a local benchmark report for validation performance, run: + +.. code-block:: console + + poetry run python benchmarks/run.py --output reports/benchmarks/current.json + +To compare two benchmark reports and optionally fail on regressions, run: + +.. code-block:: console + + poetry run python benchmarks/compare.py \ + --baseline reports/benchmarks/baseline.json \ + --candidate reports/benchmarks/current.json \ + --regression-threshold 5 \ + --fail-on-regression diff --git a/openapi_schema_validator/_caches.py b/openapi_schema_validator/_caches.py new file mode 100644 index 0000000..2d02405 --- /dev/null +++ b/openapi_schema_validator/_caches.py @@ -0,0 +1,107 @@ +from collections import OrderedDict +from dataclasses import dataclass +from threading import RLock +from typing import Any +from typing import Hashable +from typing import Mapping + +from jsonschema.protocols import Validator + +from openapi_schema_validator.settings import get_settings + + +@dataclass +class CachedValidator: + validator: Any + schema_checked: bool + + +class ValidatorCache: + def __init__(self) -> None: + self._cache: OrderedDict[Hashable, CachedValidator] = OrderedDict() + self._lock = RLock() + + def _freeze_value(self, value: Any) -> Hashable: + if isinstance(value, dict): + return tuple( + sorted( + (str(key), self._freeze_value(item)) + for key, item in value.items() + ) + ) + if isinstance(value, list): + return tuple(self._freeze_value(item) for item in value) + if isinstance(value, tuple): + return tuple(self._freeze_value(item) for item in value) + if isinstance(value, set): + return tuple( + sorted( + (self._freeze_value(item) for item in value), + key=repr, + ) + ) + if isinstance(value, (str, bytes, int, float, bool, type(None))): + return value + return ("id", id(value)) + + def _schema_fingerprint(self, schema: Mapping[str, Any]) -> Hashable: + return self._freeze_value(dict(schema)) + + def build_key( + self, + schema: Mapping[str, Any], + cls: type[Validator], + args: tuple[Any, ...], + kwargs: Mapping[str, Any], + allow_remote_references: bool, + ) -> Hashable: + return ( + cls, + allow_remote_references, + self._schema_fingerprint(schema), + self._freeze_value(args), + self._freeze_value(dict(kwargs)), + ) + + def get(self, key: Hashable) -> CachedValidator | None: + with self._lock: + return self._cache.get(key) + + def set( + self, + key: Hashable, + *, + validator: Any, + schema_checked: bool, + ) -> CachedValidator: + cached = CachedValidator( + validator=validator, + schema_checked=schema_checked, + ) + with self._lock: + self._cache[key] = cached + self._cache.move_to_end(key) + self._prune_if_needed() + return cached + + def mark_schema_checked(self, key: Hashable) -> None: + with self._lock: + cached = self._cache.get(key) + if cached is None: + return + cached.schema_checked = True + self._cache.move_to_end(key) + + def touch(self, key: Hashable) -> None: + with self._lock: + if key in self._cache: + self._cache.move_to_end(key) + + def clear(self) -> None: + with self._lock: + self._cache.clear() + + def _prune_if_needed(self) -> None: + max_size = get_settings().validate_cache_max_size + while len(self._cache) > max_size: + self._cache.popitem(last=False) diff --git a/openapi_schema_validator/settings.py b/openapi_schema_validator/settings.py new file mode 100644 index 0000000..2926417 --- /dev/null +++ b/openapi_schema_validator/settings.py @@ -0,0 +1,23 @@ +from functools import lru_cache + +from pydantic import Field +from pydantic_settings import BaseSettings +from pydantic_settings import SettingsConfigDict + + +class OpenAPISchemaValidatorSettings(BaseSettings): + model_config = SettingsConfigDict( + env_prefix="OPENAPI_SCHEMA_VALIDATOR_", + extra="ignore", + ) + + validate_cache_max_size: int = Field(default=128, ge=0) + + +@lru_cache(maxsize=1) +def get_settings() -> OpenAPISchemaValidatorSettings: + return OpenAPISchemaValidatorSettings() + + +def reset_settings_cache() -> None: + get_settings.cache_clear() diff --git a/openapi_schema_validator/shortcuts.py b/openapi_schema_validator/shortcuts.py index adf35b8..9bb5862 100644 --- a/openapi_schema_validator/shortcuts.py +++ b/openapi_schema_validator/shortcuts.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any from typing import Mapping from typing import cast @@ -6,11 +8,32 @@ from jsonschema.protocols import Validator from referencing import Registry +from openapi_schema_validator._caches import ValidatorCache from openapi_schema_validator._dialects import OAS31_BASE_DIALECT_ID from openapi_schema_validator._dialects import OAS32_BASE_DIALECT_ID from openapi_schema_validator.validators import OAS32Validator from openapi_schema_validator.validators import check_openapi_schema +_LOCAL_ONLY_REGISTRY = Registry() +_VALIDATOR_CACHE = ValidatorCache() + + +def _check_schema( + cls: type[Validator], + schema: dict[str, Any], +) -> None: + meta_schema = getattr(cls, "META_SCHEMA", None) + # jsonschema's default check_schema path does not accept a custom + # registry, so for OAS dialects we use the package registry + # explicitly to keep metaschema resolution local and deterministic. + if isinstance(meta_schema, dict) and meta_schema.get("$id") in ( + OAS31_BASE_DIALECT_ID, + OAS32_BASE_DIALECT_ID, + ): + check_openapi_schema(cls, schema) + else: + cls.check_schema(schema) + def validate( instance: Any, @@ -18,7 +41,8 @@ def validate( cls: type[Validator] = OAS32Validator, *args: Any, allow_remote_references: bool = False, - **kwargs: Any + check_schema: bool = True, + **kwargs: Any, ) -> None: """ Validate an instance against a given schema using the specified @@ -38,6 +62,9 @@ def validate( allow_remote_references: If ``True`` and no explicit ``registry`` is provided, allow jsonschema's default remote reference retrieval behavior. + check_schema: If ``True`` (default), validate the provided schema + before validating ``instance``. If ``False``, skip schema + validation and run instance validation directly. **kwargs: Keyword arguments forwarded to ``cls`` constructor (for example ``registry`` and ``format_checker``). If omitted, a local-only empty ``Registry`` is used to avoid implicit remote @@ -49,25 +76,42 @@ def validate( """ schema_dict = cast(dict[str, Any], schema) - meta_schema = getattr(cls, "META_SCHEMA", None) - # jsonschema's default check_schema path does not accept a custom - # registry, so for OAS dialects we use the package registry - # explicitly to keep metaschema resolution local and deterministic. - if isinstance(meta_schema, dict) and meta_schema.get("$id") in ( - OAS31_BASE_DIALECT_ID, - OAS32_BASE_DIALECT_ID, - ): - check_openapi_schema(cls, schema_dict) - else: - cls.check_schema(schema_dict) - validator_kwargs = kwargs.copy() if not allow_remote_references: - validator_kwargs.setdefault("registry", Registry()) + validator_kwargs.setdefault("registry", _LOCAL_ONLY_REGISTRY) + + key = _VALIDATOR_CACHE.build_key( + schema=schema, + cls=cls, + args=args, + kwargs=kwargs, + allow_remote_references=allow_remote_references, + ) + + cached = _VALIDATOR_CACHE.get(key) + + if cached is None: + if check_schema: + _check_schema(cls, schema_dict) + + validator = cls(schema_dict, *args, **validator_kwargs) + cached = _VALIDATOR_CACHE.set( + key, + validator=validator, + schema_checked=check_schema, + ) + elif check_schema and not cached.schema_checked: + _check_schema(cls, schema_dict) + _VALIDATOR_CACHE.mark_schema_checked(key) + else: + _VALIDATOR_CACHE.touch(key) - validator = cls(schema_dict, *args, **validator_kwargs) error = best_match( - validator.evolve(schema=schema_dict).iter_errors(instance) + cached.validator.evolve(schema=schema_dict).iter_errors(instance) ) if error is not None: raise error + + +def clear_validate_cache() -> None: + _VALIDATOR_CACHE.clear() diff --git a/poetry.lock b/poetry.lock index 6c5964b..88be909 100644 --- a/poetry.lock +++ b/poetry.lock @@ -18,7 +18,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" -groups = ["docs"] +groups = ["main", "docs"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -1081,7 +1081,7 @@ version = "2.12.5" description = "Data validation using Python type hints" optional = false python-versions = ">=3.9" -groups = ["docs"] +groups = ["main", "docs"] files = [ {file = "pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d"}, {file = "pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49"}, @@ -1103,7 +1103,7 @@ version = "2.41.5" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.9" -groups = ["docs"] +groups = ["main", "docs"] files = [ {file = "pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146"}, {file = "pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2"}, @@ -1256,6 +1256,30 @@ pycountry = ["pycountry (>=23)"] python-ulid = ["python-ulid (>=1,<2) ; python_version < \"3.9\"", "python-ulid (>=1,<4) ; python_version >= \"3.9\""] semver = ["semver (>=3.0.2)"] +[[package]] +name = "pydantic-settings" +version = "2.13.1" +description = "Settings management using Pydantic" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237"}, + {file = "pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025"}, +] + +[package.dependencies] +pydantic = ">=2.7.0" +python-dotenv = ">=0.21.0" +typing-inspection = ">=0.4.0" + +[package.extras] +aws-secrets-manager = ["boto3 (>=1.35.0)", "boto3-stubs[secretsmanager]"] +azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"] +gcp-secret-manager = ["google-cloud-secret-manager (>=2.23.1)"] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "pyflakes" version = "3.4.0" @@ -1351,6 +1375,21 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["pytest (>=6,!=8.1.*)"] type = ["pytest-mypy"] +[[package]] +name = "python-dotenv" +version = "1.2.2" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a"}, + {file = "python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytokens" version = "0.4.1" @@ -2127,7 +2166,6 @@ files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] -markers = {main = "python_version < \"3.13\""} [[package]] name = "typing-inspection" @@ -2135,7 +2173,7 @@ version = "0.4.2" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" -groups = ["docs"] +groups = ["main", "docs"] files = [ {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, @@ -2203,4 +2241,4 @@ ecma-regex = ["regress"] [metadata] lock-version = "2.1" python-versions = "^3.10.0" -content-hash = "dd73974c171fe06ed00a48c4b069c177885162945d22079c1d71cd7bc70e0a11" +content-hash = "8fe026e86eb5cc5fca2b05d1f0b4c3313eca02c7eebb30bff216d1d381a95ee7" diff --git a/pyproject.toml b/pyproject.toml index 734d80a..67a2c94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,8 @@ rfc3339-validator = "*" # requred by jsonschema for date-time checker jsonschema-specifications = ">=2024.10.1" referencing = "^0.37.0" regress = {version = ">=2025.10.1", optional = true} +pydantic = "^2.0.0" +pydantic-settings = "^2.0.0" [tool.poetry.extras] docs = ["sphinx", "sphinx-immaterial"] diff --git a/tests/unit/test_shortcut.py b/tests/unit/test_shortcut.py index f171f6f..e8fed51 100644 --- a/tests/unit/test_shortcut.py +++ b/tests/unit/test_shortcut.py @@ -1,12 +1,17 @@ import inspect +import re from unittest.mock import patch import pytest +from jsonschema.exceptions import SchemaError from referencing import Registry from referencing import Resource from openapi_schema_validator import OAS32Validator from openapi_schema_validator import validate +from openapi_schema_validator._regex import has_ecma_regex +from openapi_schema_validator.settings import reset_settings_cache +from openapi_schema_validator.shortcuts import clear_validate_cache @pytest.fixture(scope="function") @@ -23,6 +28,15 @@ def schema(): } +@pytest.fixture(autouse=True) +def clear_validate_cache_fixture(): + reset_settings_cache() + clear_validate_cache() + yield + clear_validate_cache() + reset_settings_cache() + + def test_validate_does_not_add_nullable_to_schema(schema): """ Verify that calling validate does not add the 'nullable' key to the schema @@ -118,3 +132,60 @@ def test_validate_can_allow_implicit_remote_references(): validate({}, schema, allow_remote_references=True) assert urlopen.called + + +def test_validate_skip_schema_check(): + schema = {"type": "string", "pattern": "["} + + with pytest.raises(SchemaError, match="is not a 'regex'"): + validate("foo", schema) + + if has_ecma_regex(): + with pytest.raises(Exception): + validate("foo", schema, check_schema=False) + else: + with pytest.raises(re.error): + validate("foo", schema, check_schema=False) + + +def test_validate_cache_avoids_rechecking_schema(schema): + with patch( + "openapi_schema_validator.shortcuts.check_openapi_schema" + ) as check_schema_mock: + validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator) + validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator) + + check_schema_mock.assert_called_once() + + +def test_validate_cache_promotes_unchecked_validator(schema): + with patch( + "openapi_schema_validator.shortcuts.check_openapi_schema" + ) as check_schema_mock: + validate( + {"email": "foo@bar.com"}, + schema, + cls=OAS32Validator, + check_schema=False, + ) + validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator) + validate({"email": "foo@bar.com"}, schema, cls=OAS32Validator) + + check_schema_mock.assert_called_once() + + +def test_validate_cache_max_size_from_env(monkeypatch): + schema_a = {"type": "string"} + schema_b = {"type": "integer"} + + monkeypatch.setenv("OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE", "1") + reset_settings_cache() + + with patch( + "openapi_schema_validator.shortcuts.check_openapi_schema" + ) as check_schema_mock: + validate("foo", schema_a, cls=OAS32Validator) + validate(1, schema_b, cls=OAS32Validator) + validate("foo", schema_a, cls=OAS32Validator) + + assert check_schema_mock.call_count == 3 From e3032c8a7bb415ae1104df1c1dd6bca448eb671c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:15:27 +0000 Subject: [PATCH 2/6] Initial plan From 244a5c7eb25b74669b7f7a6ce5758beb3a78a921 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:17:51 +0000 Subject: [PATCH 3/6] Fix review comments: ValidationError in test, clear_validate_cache import, GitHub Actions doc Co-authored-by: p1c2u <1679024+p1c2u@users.noreply.github.com> --- benchmarks/run.py | 4 ++-- docs/contributing.rst | 2 +- tests/unit/test_shortcut.py | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/benchmarks/run.py b/benchmarks/run.py index 96db549..6ac954d 100644 --- a/benchmarks/run.py +++ b/benchmarks/run.py @@ -14,7 +14,7 @@ from benchmarks.cases import BenchmarkCase from benchmarks.cases import build_cases -from openapi_schema_validator.shortcuts import _clear_validate_cache +from openapi_schema_validator.shortcuts import clear_validate_cache from openapi_schema_validator.shortcuts import validate @@ -64,7 +64,7 @@ def _measure_helper_validate_per_second( *, check_schema: bool, ) -> float: - _clear_validate_cache() + clear_validate_cache() for _ in range(warmup): validate( case.instance, diff --git a/docs/contributing.rst b/docs/contributing.rst index 64f9f27..d50cbf6 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -73,7 +73,7 @@ To run all checks on all files, enter: pre-commit run --all-files -Pre-commit check results are also attached to your PR through integration with Github Action. +Pre-commit check results are also attached to your PR through integration with GitHub Actions. Performance benchmark ^^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/unit/test_shortcut.py b/tests/unit/test_shortcut.py index e8fed51..cb596a4 100644 --- a/tests/unit/test_shortcut.py +++ b/tests/unit/test_shortcut.py @@ -4,6 +4,7 @@ import pytest from jsonschema.exceptions import SchemaError +from jsonschema.exceptions import ValidationError from referencing import Registry from referencing import Resource @@ -141,7 +142,9 @@ def test_validate_skip_schema_check(): validate("foo", schema) if has_ecma_regex(): - with pytest.raises(Exception): + with pytest.raises( + ValidationError, match="is not a valid regular expression" + ): validate("foo", schema, check_schema=False) else: with pytest.raises(re.error): From 25f3a57d4adaf5949b75de868e057cb96b27e5dc Mon Sep 17 00:00:00 2001 From: p1c2u Date: Sun, 1 Mar 2026 21:39:49 +0000 Subject: [PATCH 4/6] Document condifuration --- README.rst | 18 +++++++++++++++++- docs/index.rst | 11 +++++++++++ docs/validation.rst | 23 +++++++++++++++++++++-- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 05fb815..6be7f92 100644 --- a/README.rst +++ b/README.rst @@ -54,7 +54,14 @@ Usage .. code-block:: python - validate(instance, schema, cls=OAS32Validator, allow_remote_references=False, **kwargs) + validate( + instance, + schema, + cls=OAS32Validator, + allow_remote_references=False, + check_schema=True, + **kwargs, + ) The first argument is always the value you want to validate. The second argument is always the OpenAPI schema object. @@ -83,6 +90,15 @@ remote ``$ref`` retrieval. To resolve external references, pass an explicit ``registry``. Set ``allow_remote_references=True`` only if you explicitly accept jsonschema's default remote retrieval behavior. +``check_schema`` defaults to ``True`` and validates the schema before +validating an instance. For trusted pre-validated schemas in hot paths, set +``check_schema=False`` to skip schema checking. + +The ``validate`` helper keeps an internal compiled-validator cache. You can +control cache size using the +``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` environment variable +(default: ``128``). + To validate an OpenAPI schema: .. code-block:: python diff --git a/docs/index.rst b/docs/index.rst index 8ab4455..f17a148 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -84,6 +84,17 @@ Usage Read more about the :doc:`validation`. +Configuration +------------- + +Environment variables: + +* ``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` + Maximum number of compiled validators kept by the ``validate`` shortcut + cache. Default: ``128``. + +See :doc:`validation` for runtime behavior details. + Related projects ---------------- diff --git a/docs/validation.rst b/docs/validation.rst index 7d3b00d..379df6f 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -10,7 +10,14 @@ Validate .. code-block:: python - validate(instance, schema, cls=OAS32Validator, allow_remote_references=False, **kwargs) + validate( + instance, + schema, + cls=OAS32Validator, + allow_remote_references=False, + check_schema=True, + **kwargs, + ) The first argument is always the value you want to validate. The second argument is always the OpenAPI schema object. @@ -18,6 +25,7 @@ The ``cls`` keyword argument is optional and defaults to ``OAS32Validator``. Use ``cls`` when you need a specific validator version/behavior. The ``allow_remote_references`` keyword argument is optional and defaults to ``False``. +The ``check_schema`` keyword argument is optional and defaults to ``True``. Common forwarded keyword arguments include: - ``registry`` for explicit external reference resolution context @@ -28,6 +36,13 @@ remote ``$ref`` retrieval. Set ``allow_remote_references=True`` only if you explicitly accept jsonschema's default remote retrieval behavior. +For trusted pre-validated schemas in hot paths, set ``check_schema=False`` to +skip schema checking. + +The shortcut keeps an internal compiled-validator cache. +Use ``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` to control cache +capacity (default: ``128``). + To validate an OpenAPI schema: .. code-block:: python @@ -171,10 +186,14 @@ Schema errors vs instance errors -------------------------------- The high-level ``validate(...)`` helper checks schema validity before instance -validation, following ``jsonschema.validate(...)`` behavior. +validation by default (``check_schema=True``), following +``jsonschema.validate(...)`` behavior. Malformed schema values (for example an invalid regex in ``pattern``) raise ``SchemaError``. +When ``check_schema=False``, schema checking is skipped and malformed schemas +may instead fail during validation with lower-level errors. + If you instantiate a validator class directly and call ``.validate(...)``, schema checking is not performed automatically, matching ``jsonschema`` validator-class behavior. From 68b2bfd5a372625eca89b13d135fb2fcc59c9f8d Mon Sep 17 00:00:00 2001 From: p1c2u Date: Sun, 1 Mar 2026 21:45:57 +0000 Subject: [PATCH 5/6] Rename env var --- README.rst | 2 +- docs/contributing.rst | 3 ++- docs/index.rst | 2 +- docs/validation.rst | 2 +- openapi_schema_validator/_caches.py | 2 +- openapi_schema_validator/settings.py | 2 +- openapi_schema_validator/shortcuts.py | 4 ++-- tests/unit/test_shortcut.py | 5 ++++- 8 files changed, 13 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 6be7f92..adb3405 100644 --- a/README.rst +++ b/README.rst @@ -96,7 +96,7 @@ validating an instance. For trusted pre-validated schemas in hot paths, set The ``validate`` helper keeps an internal compiled-validator cache. You can control cache size using the -``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` environment variable +``OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE`` environment variable (default: ``128``). To validate an OpenAPI schema: diff --git a/docs/contributing.rst b/docs/contributing.rst index d50cbf6..dad3dd4 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -79,7 +79,8 @@ Performance benchmark ^^^^^^^^^^^^^^^^^^^^^ The ``validate`` shortcut uses an internal compiled-validator cache. You can -adjust cache capacity with the ``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` +adjust cache capacity with the +``OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE`` environment variable (default: ``128``). To collect a local benchmark report for validation performance, run: diff --git a/docs/index.rst b/docs/index.rst index f17a148..918b8be 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -89,7 +89,7 @@ Configuration Environment variables: -* ``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` +* ``OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE`` Maximum number of compiled validators kept by the ``validate`` shortcut cache. Default: ``128``. diff --git a/docs/validation.rst b/docs/validation.rst index 379df6f..f758c06 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -40,7 +40,7 @@ For trusted pre-validated schemas in hot paths, set ``check_schema=False`` to skip schema checking. The shortcut keeps an internal compiled-validator cache. -Use ``OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE`` to control cache +Use ``OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE`` to control cache capacity (default: ``128``). To validate an OpenAPI schema: diff --git a/openapi_schema_validator/_caches.py b/openapi_schema_validator/_caches.py index 2d02405..b684226 100644 --- a/openapi_schema_validator/_caches.py +++ b/openapi_schema_validator/_caches.py @@ -102,6 +102,6 @@ def clear(self) -> None: self._cache.clear() def _prune_if_needed(self) -> None: - max_size = get_settings().validate_cache_max_size + max_size = get_settings().compiled_validator_cache_max_size while len(self._cache) > max_size: self._cache.popitem(last=False) diff --git a/openapi_schema_validator/settings.py b/openapi_schema_validator/settings.py index 2926417..44e3843 100644 --- a/openapi_schema_validator/settings.py +++ b/openapi_schema_validator/settings.py @@ -11,7 +11,7 @@ class OpenAPISchemaValidatorSettings(BaseSettings): extra="ignore", ) - validate_cache_max_size: int = Field(default=128, ge=0) + compiled_validator_cache_max_size: int = Field(default=128, ge=0) @lru_cache(maxsize=1) diff --git a/openapi_schema_validator/shortcuts.py b/openapi_schema_validator/shortcuts.py index 9bb5862..496f65b 100644 --- a/openapi_schema_validator/shortcuts.py +++ b/openapi_schema_validator/shortcuts.py @@ -81,10 +81,10 @@ def validate( validator_kwargs.setdefault("registry", _LOCAL_ONLY_REGISTRY) key = _VALIDATOR_CACHE.build_key( - schema=schema, + schema=schema_dict, cls=cls, args=args, - kwargs=kwargs, + kwargs=validator_kwargs, allow_remote_references=allow_remote_references, ) diff --git a/tests/unit/test_shortcut.py b/tests/unit/test_shortcut.py index cb596a4..c37acc2 100644 --- a/tests/unit/test_shortcut.py +++ b/tests/unit/test_shortcut.py @@ -181,7 +181,10 @@ def test_validate_cache_max_size_from_env(monkeypatch): schema_a = {"type": "string"} schema_b = {"type": "integer"} - monkeypatch.setenv("OPENAPI_SCHEMA_VALIDATOR_VALIDATE_CACHE_MAX_SIZE", "1") + monkeypatch.setenv( + "OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE", + "1", + ) reset_settings_cache() with patch( From 2fd48258945b3b1b36872fb668574714b047c46a Mon Sep 17 00:00:00 2001 From: p1c2u Date: Sun, 1 Mar 2026 22:47:54 +0000 Subject: [PATCH 6/6] Clarify env var lietime --- README.rst | 2 ++ docs/index.rst | 2 +- docs/validation.rst | 1 + tests/unit/test_settings.py | 24 ++++++++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_settings.py diff --git a/README.rst b/README.rst index adb3405..1a45d9b 100644 --- a/README.rst +++ b/README.rst @@ -98,6 +98,8 @@ The ``validate`` helper keeps an internal compiled-validator cache. You can control cache size using the ``OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE`` environment variable (default: ``128``). +The value is loaded once at first use and reused for the lifetime of the +process. To validate an OpenAPI schema: diff --git a/docs/index.rst b/docs/index.rst index 918b8be..9b2ab0e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -91,7 +91,7 @@ Environment variables: * ``OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE`` Maximum number of compiled validators kept by the ``validate`` shortcut - cache. Default: ``128``. + cache. Default: ``128``. Loaded once at first use. See :doc:`validation` for runtime behavior details. diff --git a/docs/validation.rst b/docs/validation.rst index f758c06..ea48705 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -42,6 +42,7 @@ skip schema checking. The shortcut keeps an internal compiled-validator cache. Use ``OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE`` to control cache capacity (default: ``128``). +The setting is read once at first use and then cached for the process lifetime. To validate an OpenAPI schema: diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py new file mode 100644 index 0000000..35d4041 --- /dev/null +++ b/tests/unit/test_settings.py @@ -0,0 +1,24 @@ +from openapi_schema_validator.settings import get_settings +from openapi_schema_validator.settings import reset_settings_cache + + +def test_compiled_validator_cache_size_env_is_cached(monkeypatch): + monkeypatch.setenv( + "OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE", + "11", + ) + reset_settings_cache() + + first = get_settings() + assert first.compiled_validator_cache_max_size == 11 + + monkeypatch.setenv( + "OPENAPI_SCHEMA_VALIDATOR_COMPILED_VALIDATOR_CACHE_MAX_SIZE", + "3", + ) + second = get_settings() + assert second.compiled_validator_cache_max_size == 11 + + reset_settings_cache() + third = get_settings() + assert third.compiled_validator_cache_max_size == 3