From 9009c464a40abcc55dbcbe543939eab05a4a13f1 Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Tue, 26 Aug 2025 11:34:14 +0100 Subject: [PATCH 1/8] feat: pydantic v1 serialization and validation --- pyproject.toml | 2 +- upath/core.py | 75 ++++++++++++++++++------------ upath/tests/test_pydantic.py | 90 ++++++++++++++++++++++++++---------- 3 files changed, 112 insertions(+), 55 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ec243d78..fd40fb56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ tests = [ "pytest-mock >=3.12.0", "pylint >=2.17.4", "mypy >=1.10.0", - "pydantic >=2", + "pydantic >=2,<3", # <3 required for testing pydantic v1 support, not for actual use "pytest-mypy-plugins >=3.1.2", "packaging", ] diff --git a/upath/core.py b/upath/core.py index 4b120614..45fe0a71 100644 --- a/upath/core.py +++ b/upath/core.py @@ -3,40 +3,33 @@ import os import sys import warnings -from abc import ABCMeta -from abc import abstractmethod -from collections.abc import Iterator -from collections.abc import Mapping -from collections.abc import Sequence +from abc import ABCMeta, abstractmethod +from collections.abc import Iterator, Mapping, Sequence from copy import copy from types import MappingProxyType -from typing import IO -from typing import TYPE_CHECKING -from typing import Any -from typing import BinaryIO -from typing import Literal -from typing import TextIO -from typing import overload -from urllib.parse import SplitResult -from urllib.parse import urlsplit +from typing import IO, TYPE_CHECKING, Any, BinaryIO, Callable, Literal, TextIO, overload +from urllib.parse import SplitResult, urlsplit from fsspec.registry import get_filesystem_class from fsspec.spec import AbstractFileSystem -from upath._flavour import LazyFlavourDescriptor -from upath._flavour import upath_get_kwargs_from_url -from upath._flavour import upath_urijoin -from upath._protocol import compatible_protocol -from upath._protocol import get_upath_protocol +from upath._flavour import ( + LazyFlavourDescriptor, + upath_get_kwargs_from_url, + upath_urijoin, +) +from upath._protocol import compatible_protocol, get_upath_protocol from upath._stat import UPathStatResult from upath.registry import get_upath_class -from upath.types import UNSET_DEFAULT -from upath.types import JoinablePathLike -from upath.types import OpenablePath -from upath.types import PathInfo -from upath.types import ReadablePathLike -from upath.types import UPathParser -from upath.types import WritablePathLike +from upath.types import ( + UNSET_DEFAULT, + JoinablePathLike, + OpenablePath, + PathInfo, + ReadablePathLike, + UPathParser, + WritablePathLike, +) if TYPE_CHECKING: if sys.version_info >= (3, 11): @@ -320,9 +313,7 @@ def __new__( obj = object.__new__(upath_cls) obj._protocol = pth_protocol - upath_cls.__init__( - obj, *args, protocol=pth_protocol, **storage_options - ) # type: ignore + upath_cls.__init__(obj, *args, protocol=pth_protocol, **storage_options) # type: ignore else: raise RuntimeError("UPath.__new__ expected cls to be subclass of UPath") @@ -998,3 +989,29 @@ def __get_pydantic_core_schema__( ), serialization=serialization_schema, ) + + @classmethod + def __get_validators__(cls) -> Iterator[Callable]: + yield cls._validate_pydantic_v1 + + @classmethod + def _validate_pydantic_v1(cls, v: Any) -> UPath: + if isinstance(v, str): + return cls(v) + elif isinstance(v, UPath): + return v + elif isinstance(v, dict): + return cls( + path=v.pop("path"), + protocol=v.pop("protocol"), + **v.pop("storage_options"), + ) + else: + raise ValueError(f"Invalid path: {v}") + + def to_dict(self) -> dict[str, Any]: + return { + "path": self.path, + "protocol": self.protocol, + "storage_options": dict(self.storage_options), + } diff --git a/upath/tests/test_pydantic.py b/upath/tests/test_pydantic.py index 383ded5c..6e63339e 100644 --- a/upath/tests/test_pydantic.py +++ b/upath/tests/test_pydantic.py @@ -1,7 +1,9 @@ import json +from functools import partial from os.path import abspath import pydantic +import pydantic.v1 as pydantic_v1 import pydantic_core import pytest from fsspec.implementations.http import get_client @@ -9,6 +11,31 @@ from upath import UPath +@pytest.fixture(params=["v1", "v2"]) +def pydantic_version(request): + return request.param + + +@pytest.fixture(params=["json", "python"]) +def source(request): + return request.param + + +@pytest.fixture +def parser(pydantic_version, source): + if pydantic_version == "v1": + if source == "json": + return partial(pydantic_v1.tools.parse_raw_as, type_=UPath) + else: + return partial(pydantic_v1.tools.parse_obj_as, type_=UPath) + else: + ta = pydantic.TypeAdapter(UPath) + if source == "json": + return ta.validate_json + else: + return ta.validate_python + + @pytest.mark.parametrize( "path", [ @@ -19,15 +46,13 @@ "https://www.example.com", ], ) -@pytest.mark.parametrize("source", ["json", "python"]) -def test_validate_from_str(path, source): +def test_validate_from_str(path, source, parser): expected = UPath(path) - ta = pydantic.TypeAdapter(UPath) if source == "json": - actual = ta.validate_json(json.dumps(path)) - else: # source == "python" - actual = ta.validate_python(path) + path = json.dumps(path) + + actual = parser(path) assert abspath(actual.path) == abspath(expected.path) assert actual.protocol == expected.protocol @@ -43,13 +68,13 @@ def test_validate_from_str(path, source): } ], ) -@pytest.mark.parametrize("source", ["json", "python"]) -def test_validate_from_dict(dct, source): - ta = pydantic.TypeAdapter(UPath) +def test_validate_from_dict(dct, source, parser): if source == "json": - output = ta.validate_json(json.dumps(dct)) - else: # source == "python" - output = ta.validate_python(dct) + data = json.dumps(dct) + else: + data = dct + + output = parser(data) assert abspath(output.path) == abspath(dct["path"]) assert output.protocol == dct["protocol"] @@ -66,10 +91,13 @@ def test_validate_from_dict(dct, source): "https://www.example.com", ], ) -def test_validate_from_instance(path): +def test_validate_from_instance(path, pydantic_version): input = UPath(path) - output = pydantic.TypeAdapter(UPath).validate_python(input) + if pydantic_version == "v1": + output = pydantic_v1.tools.parse_obj_as(UPath, input) + else: + output = pydantic.TypeAdapter(UPath).validate_python(input) assert output is input @@ -88,26 +116,38 @@ def test_validate_from_instance(path): ], ) @pytest.mark.parametrize("mode", ["json", "python"]) -def test_dump(args, kwargs, mode): +def test_dump(args, kwargs, mode, pydantic_version): u = UPath(*args, **kwargs) - output = pydantic.TypeAdapter(UPath).dump_python(u, mode=mode) + if pydantic_version == "v1": + output = u.to_json_serializable() if mode == "json" else u.to_dict() + else: + output = pydantic.TypeAdapter(UPath).dump_python(u, mode=mode) assert output["path"] == u.path assert output["protocol"] == u.protocol assert output["storage_options"] == u.storage_options -def test_dump_non_serializable_python(): - output = pydantic.TypeAdapter(UPath).dump_python( - UPath("https://www.example.com", get_client=get_client), mode="python" - ) +def test_dump_non_serializable_python(pydantic_version): + upath = UPath("https://www.example.com", get_client=get_client) + + if pydantic_version == "v1": + output = upath.to_dict() + else: + output = pydantic.TypeAdapter(UPath).dump_python(upath, mode="python") assert output["storage_options"]["get_client"] is get_client -def test_dump_non_serializable_json(): - with pytest.raises(pydantic_core.PydanticSerializationError, match="unknown type"): - pydantic.TypeAdapter(UPath).dump_python( - UPath("https://www.example.com", get_client=get_client), mode="json" - ) +def test_dump_non_serializable_json(pydantic_version): + upath = UPath("https://www.example.com", get_client=get_client) + + if pydantic_version == "v1": + with pytest.raises(TypeError, match="not JSON serializable"): + json.dumps(upath.to_dict()) + else: + with pytest.raises( + pydantic_core.PydanticSerializationError, match="unknown type" + ): + pydantic.TypeAdapter(UPath).dump_python(upath, mode="json") From b99e933685fe2b33fbc599e283c0bafff388e22a Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Tue, 26 Aug 2025 13:07:17 +0100 Subject: [PATCH 2/8] feat: simplify validation using '._validate' --- upath/core.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/upath/core.py b/upath/core.py index 45fe0a71..95667eb0 100644 --- a/upath/core.py +++ b/upath/core.py @@ -937,9 +937,7 @@ def __get_pydantic_core_schema__( deserialization_schema = core_schema.chain_schema( [ - core_schema.no_info_plain_validator_function( - lambda v: {"path": v} if isinstance(v, str) else v, - ), + core_schema.no_info_plain_validator_function(cls._validate), core_schema.typed_dict_schema( { "path": core_schema.typed_dict_field( @@ -964,13 +962,6 @@ def __get_pydantic_core_schema__( }, extra_behavior="forbid", ), - core_schema.no_info_plain_validator_function( - lambda dct: cls( - dct.pop("path"), - protocol=dct.pop("protocol"), - **dct["storage_options"], - ) - ), ] ) @@ -992,19 +983,19 @@ def __get_pydantic_core_schema__( @classmethod def __get_validators__(cls) -> Iterator[Callable]: - yield cls._validate_pydantic_v1 + yield cls._validate @classmethod - def _validate_pydantic_v1(cls, v: Any) -> UPath: + def _validate(cls, v: Any) -> UPath: if isinstance(v, str): return cls(v) elif isinstance(v, UPath): return v elif isinstance(v, dict): return cls( - path=v.pop("path"), - protocol=v.pop("protocol"), - **v.pop("storage_options"), + path=v["path"], + protocol=v.get("protocol", ""), + **v.get("storage_options", {}), ) else: raise ValueError(f"Invalid path: {v}") From 09a5f76c56b43bb989d3e3b8ecddc81666d0a364 Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Tue, 26 Aug 2025 13:36:09 +0100 Subject: [PATCH 3/8] fix: linting --- upath/core.py | 50 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/upath/core.py b/upath/core.py index 95667eb0..03f2e2fb 100644 --- a/upath/core.py +++ b/upath/core.py @@ -3,33 +3,41 @@ import os import sys import warnings -from abc import ABCMeta, abstractmethod -from collections.abc import Iterator, Mapping, Sequence +from abc import ABCMeta +from abc import abstractmethod +from collections.abc import Iterator +from collections.abc import Mapping +from collections.abc import Sequence from copy import copy from types import MappingProxyType -from typing import IO, TYPE_CHECKING, Any, BinaryIO, Callable, Literal, TextIO, overload -from urllib.parse import SplitResult, urlsplit +from typing import IO +from typing import TYPE_CHECKING +from typing import Any +from typing import BinaryIO +from typing import Callable +from typing import Literal +from typing import TextIO +from typing import overload +from urllib.parse import SplitResult +from urllib.parse import urlsplit from fsspec.registry import get_filesystem_class from fsspec.spec import AbstractFileSystem -from upath._flavour import ( - LazyFlavourDescriptor, - upath_get_kwargs_from_url, - upath_urijoin, -) -from upath._protocol import compatible_protocol, get_upath_protocol +from upath._flavour import LazyFlavourDescriptor +from upath._flavour import upath_get_kwargs_from_url +from upath._flavour import upath_urijoin +from upath._protocol import compatible_protocol +from upath._protocol import get_upath_protocol from upath._stat import UPathStatResult from upath.registry import get_upath_class -from upath.types import ( - UNSET_DEFAULT, - JoinablePathLike, - OpenablePath, - PathInfo, - ReadablePathLike, - UPathParser, - WritablePathLike, -) +from upath.types import UNSET_DEFAULT +from upath.types import JoinablePathLike +from upath.types import OpenablePath +from upath.types import PathInfo +from upath.types import ReadablePathLike +from upath.types import UPathParser +from upath.types import WritablePathLike if TYPE_CHECKING: if sys.version_info >= (3, 11): @@ -313,7 +321,9 @@ def __new__( obj = object.__new__(upath_cls) obj._protocol = pth_protocol - upath_cls.__init__(obj, *args, protocol=pth_protocol, **storage_options) # type: ignore + upath_cls.__init__( + obj, *args, protocol=pth_protocol, **storage_options + ) # type: ignore else: raise RuntimeError("UPath.__new__ expected cls to be subclass of UPath") From c6ffee7843077b814a5c8a39273cd4b092ce9aa8 Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Tue, 26 Aug 2025 13:36:14 +0100 Subject: [PATCH 4/8] fix: tests --- upath/tests/test_pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upath/tests/test_pydantic.py b/upath/tests/test_pydantic.py index 6e63339e..ebb71d6c 100644 --- a/upath/tests/test_pydantic.py +++ b/upath/tests/test_pydantic.py @@ -120,7 +120,7 @@ def test_dump(args, kwargs, mode, pydantic_version): u = UPath(*args, **kwargs) if pydantic_version == "v1": - output = u.to_json_serializable() if mode == "json" else u.to_dict() + output = u.to_dict() else: output = pydantic.TypeAdapter(UPath).dump_python(u, mode=mode) From 03f7d9a738d47abeffe00652d683d0e194d8e308 Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Tue, 26 Aug 2025 15:19:17 +0100 Subject: [PATCH 5/8] feat: include 'to_dict' on 'UPathMixin', fix validation schema, add 'SerializedUPath' typed dict --- upath/core.py | 71 +++++++++++++++++++++++++----------- upath/tests/test_pydantic.py | 4 +- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/upath/core.py b/upath/core.py index 03f2e2fb..f39a8c91 100644 --- a/upath/core.py +++ b/upath/core.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import pathlib import sys import warnings from abc import ABCMeta @@ -11,12 +12,15 @@ from copy import copy from types import MappingProxyType from typing import IO -from typing import TYPE_CHECKING -from typing import Any +from typing import TYPE_CHECKING, Any from typing import BinaryIO from typing import Callable from typing import Literal +from typing import NotRequired from typing import TextIO +from typing import TypeAlias +from typing import TypedDict +from typing import Union from typing import overload from urllib.parse import SplitResult from urllib.parse import urlsplit @@ -27,8 +31,7 @@ from upath._flavour import LazyFlavourDescriptor from upath._flavour import upath_get_kwargs_from_url from upath._flavour import upath_urijoin -from upath._protocol import compatible_protocol -from upath._protocol import get_upath_protocol +from upath._protocol import compatible_protocol, get_upath_protocol from upath._stat import UPathStatResult from upath.registry import get_upath_class from upath.types import UNSET_DEFAULT @@ -108,6 +111,18 @@ def __getitem__(cls, key): return cls +class SerializedUPath(TypedDict): + """Serialized format for a UPath object""" + + path: str + protocol: NotRequired[str] + storage_options: NotRequired[dict[str, Any]] + + +# a pathlike object that can be turned into a UPath +_PathLike: TypeAlias = Union[str, pathlib.Path, "UPath", SerializedUPath] + + class _UPathMixin(metaclass=_UPathMeta): __slots__ = () @@ -180,6 +195,13 @@ def path(self) -> str: """The path that a fsspec filesystem can use.""" return self.parser.strip_protocol(self.__str__()) + def to_dict(self) -> SerializedUPath: + return { + "path": self.path, + "protocol": self.protocol, + "storage_options": dict(self.storage_options), + } + def joinuri(self, uri: JoinablePathLike) -> UPath: """Join with urljoin behavior for UPath instances""" # short circuit if the new uri uses a different protocol @@ -947,7 +969,7 @@ def __get_pydantic_core_schema__( deserialization_schema = core_schema.chain_schema( [ - core_schema.no_info_plain_validator_function(cls._validate), + core_schema.no_info_plain_validator_function(cls._to_serialized_format), core_schema.typed_dict_schema( { "path": core_schema.typed_dict_field( @@ -972,6 +994,7 @@ def __get_pydantic_core_schema__( }, extra_behavior="forbid", ), + core_schema.no_info_plain_validator_function(cls._validate), ] ) @@ -995,24 +1018,28 @@ def __get_pydantic_core_schema__( def __get_validators__(cls) -> Iterator[Callable]: yield cls._validate + @staticmethod + def _to_serialized_format(v: _PathLike) -> SerializedUPath: + if isinstance(v, UPath): + return v.to_dict() + if isinstance(v, dict): + return v + if isinstance(v, pathlib.Path): + return {"path": v.as_posix(), "protocol": ""} + if isinstance(v, str): + return { + "path": v, + } + raise TypeError(f"Invalid path: {v}") + @classmethod def _validate(cls, v: Any) -> UPath: - if isinstance(v, str): - return cls(v) - elif isinstance(v, UPath): - return v - elif isinstance(v, dict): + if not isinstance(v, UPath): + v = cls._to_serialized_format(v) + return cls( - path=v["path"], - protocol=v.get("protocol", ""), - **v.get("storage_options", {}), + v["path"], + protocol=v.get("protocol"), + **v.get("storage_options", {}), # type: ignore[arg-type] ) - else: - raise ValueError(f"Invalid path: {v}") - - def to_dict(self) -> dict[str, Any]: - return { - "path": self.path, - "protocol": self.protocol, - "storage_options": dict(self.storage_options), - } + return v diff --git a/upath/tests/test_pydantic.py b/upath/tests/test_pydantic.py index ebb71d6c..2a52f9c6 100644 --- a/upath/tests/test_pydantic.py +++ b/upath/tests/test_pydantic.py @@ -25,9 +25,9 @@ def source(request): def parser(pydantic_version, source): if pydantic_version == "v1": if source == "json": - return partial(pydantic_v1.tools.parse_raw_as, type_=UPath) + return lambda x: pydantic_v1.tools.parse_raw_as(UPath, x) else: - return partial(pydantic_v1.tools.parse_obj_as, type_=UPath) + return lambda x: pydantic_v1.tools.parse_obj_as(UPath, x) else: ta = pydantic.TypeAdapter(UPath) if source == "json": From 52e2a1849e9a9e20bd8724d97450984533cb7640 Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Tue, 26 Aug 2025 17:16:44 +0100 Subject: [PATCH 6/8] fix: typing --- upath/core.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/upath/core.py b/upath/core.py index f39a8c91..4972eb95 100644 --- a/upath/core.py +++ b/upath/core.py @@ -16,11 +16,8 @@ from typing import BinaryIO from typing import Callable from typing import Literal -from typing import NotRequired from typing import TextIO -from typing import TypeAlias from typing import TypedDict -from typing import Union from typing import overload from urllib.parse import SplitResult from urllib.parse import urlsplit @@ -45,8 +42,10 @@ if TYPE_CHECKING: if sys.version_info >= (3, 11): from typing import Self + from typing import NotRequired else: from typing_extensions import Self + from typing_extensions import NotRequired from pydantic import GetCoreSchemaHandler from pydantic_core.core_schema import CoreSchema @@ -119,10 +118,6 @@ class SerializedUPath(TypedDict): storage_options: NotRequired[dict[str, Any]] -# a pathlike object that can be turned into a UPath -_PathLike: TypeAlias = Union[str, pathlib.Path, "UPath", SerializedUPath] - - class _UPathMixin(metaclass=_UPathMeta): __slots__ = () @@ -1019,18 +1014,24 @@ def __get_validators__(cls) -> Iterator[Callable]: yield cls._validate @staticmethod - def _to_serialized_format(v: _PathLike) -> SerializedUPath: - if isinstance(v, UPath): + def _to_serialized_format( + v: str | pathlib.Path | _UPathMixin | dict[str, Any] + ) -> SerializedUPath: + if isinstance(v, _UPathMixin): return v.to_dict() if isinstance(v, dict): - return v + return { + "path": v["path"], + "protocol": v.get("protocol", ""), + "storage_options": v.get("storage_options", {}), + } if isinstance(v, pathlib.Path): return {"path": v.as_posix(), "protocol": ""} if isinstance(v, str): return { "path": v, } - raise TypeError(f"Invalid path: {v}") + raise TypeError(f"Invalid path: {v!r}") @classmethod def _validate(cls, v: Any) -> UPath: From ea1e65e6877a1f046be9d5b2bea9220c33d271f5 Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Tue, 26 Aug 2025 22:14:31 +0100 Subject: [PATCH 7/8] fix: linting --- upath/core.py | 10 ++++++---- upath/tests/test_pydantic.py | 1 - 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/upath/core.py b/upath/core.py index 4972eb95..25ca6d21 100644 --- a/upath/core.py +++ b/upath/core.py @@ -12,7 +12,8 @@ from copy import copy from types import MappingProxyType from typing import IO -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING +from typing import Any from typing import BinaryIO from typing import Callable from typing import Literal @@ -28,7 +29,8 @@ from upath._flavour import LazyFlavourDescriptor from upath._flavour import upath_get_kwargs_from_url from upath._flavour import upath_urijoin -from upath._protocol import compatible_protocol, get_upath_protocol +from upath._protocol import compatible_protocol +from upath._protocol import get_upath_protocol from upath._stat import UPathStatResult from upath.registry import get_upath_class from upath.types import UNSET_DEFAULT @@ -41,11 +43,11 @@ if TYPE_CHECKING: if sys.version_info >= (3, 11): - from typing import Self from typing import NotRequired + from typing import Self else: - from typing_extensions import Self from typing_extensions import NotRequired + from typing_extensions import Self from pydantic import GetCoreSchemaHandler from pydantic_core.core_schema import CoreSchema diff --git a/upath/tests/test_pydantic.py b/upath/tests/test_pydantic.py index 2a52f9c6..e7928a22 100644 --- a/upath/tests/test_pydantic.py +++ b/upath/tests/test_pydantic.py @@ -1,5 +1,4 @@ import json -from functools import partial from os.path import abspath import pydantic From 4158fdd814f6e89757f8c9a32519ca8bbf415e67 Mon Sep 17 00:00:00 2001 From: Dan Woodward Date: Wed, 27 Aug 2025 09:59:42 +0100 Subject: [PATCH 8/8] fix: linting, pydantic v3 pin in tests --- pyproject.toml | 2 +- upath/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fd40fb56..6a5d6cac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ tests = [ "pytest-mock >=3.12.0", "pylint >=2.17.4", "mypy >=1.10.0", - "pydantic >=2,<3", # <3 required for testing pydantic v1 support, not for actual use + "pydantic >=2", # <3 required for testing pydantic v1 support, not for actual use "pytest-mypy-plugins >=3.1.2", "packaging", ] diff --git a/upath/core.py b/upath/core.py index 25ca6d21..ef441ebc 100644 --- a/upath/core.py +++ b/upath/core.py @@ -1017,7 +1017,7 @@ def __get_validators__(cls) -> Iterator[Callable]: @staticmethod def _to_serialized_format( - v: str | pathlib.Path | _UPathMixin | dict[str, Any] + v: str | pathlib.Path | _UPathMixin | dict[str, Any], ) -> SerializedUPath: if isinstance(v, _UPathMixin): return v.to_dict()