From b83105437852dfebdaaf3be609eb98e57f7fd3fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20Gon=C3=A7alves?= Date: Sun, 24 Nov 2024 11:17:26 +0000 Subject: [PATCH 1/4] feat(stage): create an abstraction for FileLocation --- setup.cfg | 1 + singlestoredb/management/files.py | 465 ++++++++++++++++++++++++++ singlestoredb/management/workspace.py | 364 ++------------------ 3 files changed, 494 insertions(+), 336 deletions(-) create mode 100644 singlestoredb/management/files.py diff --git a/setup.cfg b/setup.cfg index ff73c4f46..cadc590e4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,6 +70,7 @@ sqlalchemy = *.typed *.sql *.csv + *.ipynb [tool:pytest] markers = diff --git a/singlestoredb/management/files.py b/singlestoredb/management/files.py new file mode 100644 index 000000000..1f60592a3 --- /dev/null +++ b/singlestoredb/management/files.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python +"""SingleStore Cloud Files Management.""" +from __future__ import annotations + +import datetime +import io +import os +import re +from abc import ABC +from abc import abstractmethod +from typing import Any +from typing import BinaryIO +from typing import Dict +from typing import List +from typing import Optional +from typing import TextIO +from typing import Union + +from ..exceptions import ManagementError +from .utils import PathLike +from .utils import to_datetime +from .utils import vars_to_str + + +class FilesObject(object): + """ + File / folder object. + + It can belong to either a workspace stage or personal/shared space. + + This object is not instantiated directly. It is used in the results + of various operations in ``WorkspaceGroup.stage`` methods. + + """ + + def __init__( + self, + name: str, + path: str, + size: int, + type: str, + format: str, + mimetype: str, + created: Optional[datetime.datetime], + last_modified: Optional[datetime.datetime], + writable: bool, + content: Optional[List[str]] = None, + ): + #: Name of file / folder + self.name = name + + if type == 'directory': + path = re.sub(r'/*$', r'', str(path)) + '/' + + #: Path of file / folder + self.path = path + + #: Size of the object (in bytes) + self.size = size + + #: Data type: file or directory + self.type = type + + #: Data format + self.format = format + + #: Mime type + self.mimetype = mimetype + + #: Datetime the object was created + self.created_at = created + + #: Datetime the object was modified last + self.last_modified_at = last_modified + + #: Is the object writable? + self.writable = writable + + #: Contents of a directory + self.content: List[str] = content or [] + + self._location: Optional[FileLocation] = None + + @classmethod + def from_dict( + cls, + obj: Dict[str, Any], + location: FileLocation, + ) -> FilesObject: + """ + Construct a FilesObject from a dictionary of values. + + Parameters + ---------- + obj : dict + Dictionary of values + location : FileLocation + FileLocation object to use as the parent + + Returns + ------- + :class:`FilesObject` + + """ + out = cls( + name=obj['name'], + path=obj['path'], + size=obj['size'], + type=obj['type'], + format=obj['format'], + mimetype=obj['mimetype'], + created=to_datetime(obj.get('created')), + last_modified=to_datetime(obj.get('last_modified')), + writable=bool(obj['writable']), + ) + out._location = location + return out + + def __str__(self) -> str: + """Return string representation.""" + return vars_to_str(self) + + def __repr__(self) -> str: + """Return string representation.""" + return str(self) + + def open( + self, + mode: str = 'r', + encoding: Optional[str] = None, + ) -> Union[io.StringIO, io.BytesIO]: + """ + Open a file path for reading or writing. + + Parameters + ---------- + mode : str, optional + The read / write mode. The following modes are supported: + * 'r' open for reading (default) + * 'w' open for writing, truncating the file first + * 'x' create a new file and open it for writing + The data type can be specified by adding one of the following: + * 'b' binary mode + * 't' text mode (default) + encoding : str, optional + The string encoding to use for text + + Returns + ------- + FilesObjectBytesReader - 'rb' or 'b' mode + FilesObjectBytesWriter - 'wb' or 'xb' mode + FilesObjectTextReader - 'r' or 'rt' mode + FilesObjectTextWriter - 'w', 'x', 'wt' or 'xt' mode + + """ + if self._location is None: + raise ManagementError( + msg='No FileLocation object is associated with this object.', + ) + + if self.is_dir(): + raise IsADirectoryError( + f'directories can not be read or written: {self.path}', + ) + + return self._location.open(self.path, mode=mode, encoding=encoding) + + def download( + self, + local_path: Optional[PathLike] = None, + *, + overwrite: bool = False, + encoding: Optional[str] = None, + ) -> Optional[Union[bytes, str]]: + """ + Download the content of a file path. + + Parameters + ---------- + local_path : Path or str + Path to local file target location + overwrite : bool, optional + Should an existing file be overwritten if it exists? + encoding : str, optional + Encoding used to convert the resulting data + + Returns + ------- + bytes or str or None + + """ + if self._location is None: + raise ManagementError( + msg='No FileLocation object is associated with this object.', + ) + + return self._location.download_file( + self.path, local_path=local_path, + overwrite=overwrite, encoding=encoding, + ) + + download_file = download + + def remove(self) -> None: + """Delete the file.""" + if self._location is None: + raise ManagementError( + msg='No FileLocation object is associated with this object.', + ) + + if self.type == 'directory': + raise IsADirectoryError( + f'path is a directory; use rmdir or removedirs {self.path}', + ) + + self._location.remove(self.path) + + def rmdir(self) -> None: + """Delete the empty directory.""" + if self._location is None: + raise ManagementError( + msg='No FileLocation object is associated with this object.', + ) + + if self.type != 'directory': + raise NotADirectoryError( + f'path is not a directory: {self.path}', + ) + + self._location.rmdir(self.path) + + def removedirs(self) -> None: + """Delete the directory recursively.""" + if self._location is None: + raise ManagementError( + msg='No FileLocation object is associated with this object.', + ) + + if self.type != 'directory': + raise NotADirectoryError( + f'path is not a directory: {self.path}', + ) + + self._location.removedirs(self.path) + + def rename(self, new_path: PathLike, *, overwrite: bool = False) -> None: + """ + Move the file to a new location. + + Parameters + ---------- + new_path : Path or str + The new location of the file + overwrite : bool, optional + Should path be overwritten if it already exists? + + """ + if self._location is None: + raise ManagementError( + msg='No FileLocation object is associated with this object.', + ) + out = self._location.rename(self.path, new_path, overwrite=overwrite) + self.name = out.name + self.path = out.path + return None + + def exists(self) -> bool: + """Does the file / folder exist?""" + if self._location is None: + raise ManagementError( + msg='No FileLocation object is associated with this object.', + ) + return self._location.exists(self.path) + + def is_dir(self) -> bool: + """Is the object a directory?""" + return self.type == 'directory' + + def is_file(self) -> bool: + """Is the object a file?""" + return self.type != 'directory' + + def abspath(self) -> str: + """Return the full path of the object.""" + return str(self.path) + + def basename(self) -> str: + """Return the basename of the object.""" + return self.name + + def dirname(self) -> str: + """Return the directory name of the object.""" + return re.sub(r'/*$', r'', os.path.dirname(re.sub(r'/*$', r'', self.path))) + '/' + + def getmtime(self) -> float: + """Return the last modified datetime as a UNIX timestamp.""" + if self.last_modified_at is None: + return 0.0 + return self.last_modified_at.timestamp() + + def getctime(self) -> float: + """Return the creation datetime as a UNIX timestamp.""" + if self.created_at is None: + return 0.0 + return self.created_at.timestamp() + + +class FilesObjectTextWriter(io.StringIO): + """StringIO wrapper for writing to FileLocation.""" + + def __init__(self, buffer: Optional[str], location: FileLocation, path: PathLike): + self._location = location + self._path = path + super().__init__(buffer) + + def close(self) -> None: + """Write the content to the path.""" + self._location._upload(self.getvalue(), self._path) + super().close() + + +class FilesObjectTextReader(io.StringIO): + """StringIO wrapper for reading from FileLocation.""" + + +class FilesObjectBytesWriter(io.BytesIO): + """BytesIO wrapper for writing to FileLocation.""" + + def __init__(self, buffer: bytes, location: FileLocation, path: PathLike): + self._location = location + self._path = path + super().__init__(buffer) + + def close(self) -> None: + """Write the content to the file path.""" + self._location._upload(self.getvalue(), self._path) + super().close() + + +class FilesObjectBytesReader(io.BytesIO): + """BytesIO wrapper for reading from FileLocation.""" + + +class FileLocation(ABC): + @abstractmethod + def open( + self, + path: PathLike, + mode: str = 'r', + encoding: Optional[str] = None, + ) -> Union[io.StringIO, io.BytesIO]: + pass + + @abstractmethod + def upload_file( + self, + local_path: Union[PathLike, TextIO, BinaryIO], + path: PathLike, + *, + overwrite: bool = False, + ) -> FilesObject: + pass + + @abstractmethod + def upload_folder( + self, + local_path: PathLike, + path: PathLike, + *, + overwrite: bool = False, + recursive: bool = True, + include_root: bool = False, + ignore: Optional[Union[PathLike, List[PathLike]]] = None, + ) -> FilesObject: + pass + + @abstractmethod + def _upload( + self, + content: Union[str, bytes, TextIO, BinaryIO], + path: PathLike, + *, + overwrite: bool = False, + ) -> FilesObject: + pass + + @abstractmethod + def mkdir(self, path: PathLike, overwrite: bool = False) -> FilesObject: + pass + + @abstractmethod + def rename( + self, + old_path: PathLike, + new_path: PathLike, + *, + overwrite: bool = False, + ) -> FilesObject: + pass + + @abstractmethod + def info(self, path: PathLike) -> FilesObject: + pass + + @abstractmethod + def exists(self, path: PathLike) -> bool: + pass + + @abstractmethod + def is_dir(self, path: PathLike) -> bool: + pass + + @abstractmethod + def is_file(self, path: PathLike) -> bool: + pass + + @abstractmethod + def listdir( + self, + path: PathLike = '/', + *, + recursive: bool = False, + ) -> List[str]: + pass + + @abstractmethod + def download_file( + self, + path: PathLike, + local_path: Optional[PathLike] = None, + *, + overwrite: bool = False, + encoding: Optional[str] = None, + ) -> Optional[Union[bytes, str]]: + pass + + @abstractmethod + def download_folder( + self, + path: PathLike, + local_path: PathLike = '.', + *, + overwrite: bool = False, + ) -> None: + pass + + @abstractmethod + def remove(self, path: PathLike) -> None: + pass + + @abstractmethod + def removedirs(self, path: PathLike) -> None: + pass + + @abstractmethod + def rmdir(self, path: PathLike) -> None: + pass + + @abstractmethod + def __str__(self) -> str: + pass + + @abstractmethod + def __repr__(self) -> str: + pass diff --git a/singlestoredb/management/workspace.py b/singlestoredb/management/workspace.py index 7ff686281..795a5128e 100644 --- a/singlestoredb/management/workspace.py +++ b/singlestoredb/management/workspace.py @@ -21,6 +21,12 @@ from .. import connection from ..exceptions import ManagementError from .billing_usage import BillingUsageItem +from .files import FileLocation +from .files import FilesObject +from .files import FilesObjectBytesReader +from .files import FilesObjectBytesWriter +from .files import FilesObjectTextReader +from .files import FilesObjectTextWriter from .manager import Manager from .organization import Organization from .region import Region @@ -84,324 +90,7 @@ def get_workspace( raise RuntimeError('no workspace group specified') -class StageObject(object): - """ - Stage file / folder object. - - This object is not instantiated directly. It is used in the results - of various operations in ``WorkspaceGroup.stage`` methods. - - """ - - def __init__( - self, - name: str, - path: str, - size: int, - type: str, - format: str, - mimetype: str, - created: Optional[datetime.datetime], - last_modified: Optional[datetime.datetime], - writable: bool, - content: Optional[List[str]] = None, - ): - #: Name of file / folder - self.name = name - - if type == 'directory': - path = re.sub(r'/*$', r'', str(path)) + '/' - - #: Path of file / folder - self.path = path - - #: Size of the object (in bytes) - self.size = size - - #: Data type: file or directory - self.type = type - - #: Data format - self.format = format - - #: Mime type - self.mimetype = mimetype - - #: Datetime the object was created - self.created_at = created - - #: Datetime the object was modified last - self.last_modified_at = last_modified - - #: Is the object writable? - self.writable = writable - - #: Contents of a directory - self.content: List[str] = content or [] - - self._stage: Optional[Stage] = None - - @classmethod - def from_dict( - cls, - obj: Dict[str, Any], - stage: Stage, - ) -> StageObject: - """ - Construct a StageObject from a dictionary of values. - - Parameters - ---------- - obj : dict - Dictionary of values - stage : Stage - Stage object to use as the parent - - Returns - ------- - :class:`StageObject` - - """ - out = cls( - name=obj['name'], - path=obj['path'], - size=obj['size'], - type=obj['type'], - format=obj['format'], - mimetype=obj['mimetype'], - created=to_datetime(obj.get('created')), - last_modified=to_datetime(obj.get('last_modified')), - writable=bool(obj['writable']), - ) - out._stage = stage - return out - - def __str__(self) -> str: - """Return string representation.""" - return vars_to_str(self) - - def __repr__(self) -> str: - """Return string representation.""" - return str(self) - - def open( - self, - mode: str = 'r', - encoding: Optional[str] = None, - ) -> Union[io.StringIO, io.BytesIO]: - """ - Open a Stage path for reading or writing. - - Parameters - ---------- - mode : str, optional - The read / write mode. The following modes are supported: - * 'r' open for reading (default) - * 'w' open for writing, truncating the file first - * 'x' create a new file and open it for writing - The data type can be specified by adding one of the following: - * 'b' binary mode - * 't' text mode (default) - encoding : str, optional - The string encoding to use for text - - Returns - ------- - StageObjectBytesReader - 'rb' or 'b' mode - StageObjectBytesWriter - 'wb' or 'xb' mode - StageObjectTextReader - 'r' or 'rt' mode - StageObjectTextWriter - 'w', 'x', 'wt' or 'xt' mode - - """ - if self._stage is None: - raise ManagementError( - msg='No Stage object is associated with this object.', - ) - - if self.is_dir(): - raise IsADirectoryError( - f'directories can not be read or written: {self.path}', - ) - - return self._stage.open(self.path, mode=mode, encoding=encoding) - - def download( - self, - local_path: Optional[PathLike] = None, - *, - overwrite: bool = False, - encoding: Optional[str] = None, - ) -> Optional[Union[bytes, str]]: - """ - Download the content of a stage path. - - Parameters - ---------- - local_path : Path or str - Path to local file target location - overwrite : bool, optional - Should an existing file be overwritten if it exists? - encoding : str, optional - Encoding used to convert the resulting data - - Returns - ------- - bytes or str or None - - """ - if self._stage is None: - raise ManagementError( - msg='No Stage object is associated with this object.', - ) - - return self._stage.download_file( - self.path, local_path=local_path, - overwrite=overwrite, encoding=encoding, - ) - - download_file = download - - def remove(self) -> None: - """Delete the stage file.""" - if self._stage is None: - raise ManagementError( - msg='No Stage object is associated with this object.', - ) - - if self.type == 'directory': - raise IsADirectoryError( - f'path is a directory; use rmdir or removedirs {self.path}', - ) - - self._stage.remove(self.path) - - def rmdir(self) -> None: - """Delete the empty stage directory.""" - if self._stage is None: - raise ManagementError( - msg='No Stage object is associated with this object.', - ) - - if self.type != 'directory': - raise NotADirectoryError( - f'path is not a directory: {self.path}', - ) - - self._stage.rmdir(self.path) - - def removedirs(self) -> None: - """Delete the stage directory recursively.""" - if self._stage is None: - raise ManagementError( - msg='No Stage object is associated with this object.', - ) - - if self.type != 'directory': - raise NotADirectoryError( - f'path is not a directory: {self.path}', - ) - - self._stage.removedirs(self.path) - - def rename(self, new_path: PathLike, *, overwrite: bool = False) -> None: - """ - Move the stage file to a new location. - - Parameters - ---------- - new_path : Path or str - The new location of the file - overwrite : bool, optional - Should path be overwritten if it already exists? - - """ - if self._stage is None: - raise ManagementError( - msg='No Stage object is associated with this object.', - ) - out = self._stage.rename(self.path, new_path, overwrite=overwrite) - self.name = out.name - self.path = out.path - return None - - def exists(self) -> bool: - """Does the file / folder exist?""" - if self._stage is None: - raise ManagementError( - msg='No Stage object is associated with this object.', - ) - return self._stage.exists(self.path) - - def is_dir(self) -> bool: - """Is the stage object a directory?""" - return self.type == 'directory' - - def is_file(self) -> bool: - """Is the stage object a file?""" - return self.type != 'directory' - - def abspath(self) -> str: - """Return the full path of the object.""" - return str(self.path) - - def basename(self) -> str: - """Return the basename of the object.""" - return self.name - - def dirname(self) -> str: - """Return the directory name of the object.""" - return re.sub(r'/*$', r'', os.path.dirname(re.sub(r'/*$', r'', self.path))) + '/' - - def getmtime(self) -> float: - """Return the last modified datetime as a UNIX timestamp.""" - if self.last_modified_at is None: - return 0.0 - return self.last_modified_at.timestamp() - - def getctime(self) -> float: - """Return the creation datetime as a UNIX timestamp.""" - if self.created_at is None: - return 0.0 - return self.created_at.timestamp() - - -class StageObjectTextWriter(io.StringIO): - """StringIO wrapper for writing to Stage.""" - - def __init__(self, buffer: Optional[str], stage: Stage, stage_path: PathLike): - self._stage = stage - self._stage_path = stage_path - super().__init__(buffer) - - def close(self) -> None: - """Write the content to the stage path.""" - self._stage._upload(self.getvalue(), self._stage_path) - super().close() - - -class StageObjectTextReader(io.StringIO): - """StringIO wrapper for reading from Stage.""" - - -class StageObjectBytesWriter(io.BytesIO): - """BytesIO wrapper for writing to Stage.""" - - def __init__(self, buffer: bytes, stage: Stage, stage_path: PathLike): - self._stage = stage - self._stage_path = stage_path - super().__init__(buffer) - - def close(self) -> None: - """Write the content to the stage path.""" - self._stage._upload(self.getvalue(), self._stage_path) - super().close() - - -class StageObjectBytesReader(io.BytesIO): - """BytesIO wrapper for reading from Stage.""" - - -class Stage(object): +class Stage(FileLocation): """ Stage manager. @@ -440,10 +129,10 @@ def open( Returns ------- - StageObjectBytesReader - 'rb' or 'b' mode - StageObjectBytesWriter - 'wb' or 'xb' mode - StageObjectTextReader - 'r' or 'rt' mode - StageObjectTextWriter - 'w', 'x', 'wt' or 'xt' mode + FilesObjectBytesReader - 'rb' or 'b' mode + FilesObjectBytesWriter - 'wb' or 'xb' mode + FilesObjectTextReader - 'r' or 'rt' mode + FilesObjectTextWriter - 'w', 'x', 'wt' or 'xt' mode """ if '+' in mode or 'a' in mode: @@ -456,19 +145,19 @@ def open( raise FileExistsError(f'stage path already exists: {stage_path}') self.remove(stage_path) if 'b' in mode: - return StageObjectBytesWriter(b'', self, stage_path) - return StageObjectTextWriter('', self, stage_path) + return FilesObjectBytesWriter(b'', self, stage_path) + return FilesObjectTextWriter('', self, stage_path) if 'r' in mode: content = self.download_file(stage_path) if isinstance(content, bytes): if 'b' in mode: - return StageObjectBytesReader(content) + return FilesObjectBytesReader(content) encoding = 'utf-8' if encoding is None else encoding - return StageObjectTextReader(content.decode(encoding)) + return FilesObjectTextReader(content.decode(encoding)) if isinstance(content, str): - return StageObjectTextReader(content) + return FilesObjectTextReader(content) raise ValueError(f'unrecognized file content type: {type(content)}') @@ -480,7 +169,7 @@ def upload_file( stage_path: PathLike, *, overwrite: bool = False, - ) -> StageObject: + ) -> FilesObject: """ Upload a local file. @@ -518,7 +207,7 @@ def upload_folder( recursive: bool = True, include_root: bool = False, ignore: Optional[Union[PathLike, List[PathLike]]] = None, - ) -> StageObject: + ) -> FilesObject: """ Upload a folder recursively. @@ -573,7 +262,7 @@ def _upload( stage_path: PathLike, *, overwrite: bool = False, - ) -> StageObject: + ) -> FilesObject: """ Upload content to a stage file. @@ -600,7 +289,7 @@ def _upload( return self.info(stage_path) - def mkdir(self, stage_path: PathLike, overwrite: bool = False) -> StageObject: + def mkdir(self, stage_path: PathLike, overwrite: bool = False) -> FilesObject: """ Make a directory in the stage. @@ -613,7 +302,7 @@ def mkdir(self, stage_path: PathLike, overwrite: bool = False) -> StageObject: Returns ------- - StageObject + FilesObject """ stage_path = re.sub(r'/*$', r'', str(stage_path)) + '/' @@ -638,7 +327,7 @@ def rename( new_path: PathLike, *, overwrite: bool = False, - ) -> StageObject: + ) -> FilesObject: """ Move the stage file to a new location. @@ -674,7 +363,7 @@ def rename( return self.info(new_path) - def info(self, stage_path: PathLike) -> StageObject: + def info(self, stage_path: PathLike) -> FilesObject: """ Return information about a stage location. @@ -685,7 +374,7 @@ def info(self, stage_path: PathLike) -> StageObject: Returns ------- - StageObject + FilesObject """ res = self._manager._get( @@ -693,7 +382,7 @@ def info(self, stage_path: PathLike) -> StageObject: params=dict(metadata=1), ).json() - return StageObject.from_dict(res, self) + return FilesObject.from_dict(res, self) def exists(self, stage_path: PathLike) -> bool: """ @@ -953,6 +642,9 @@ def __repr__(self) -> str: return str(self) +StageObject = FilesObject # alias for backward compatibility + + class Workspace(object): """ SingleStoreDB workspace definition. From 4c181fcc4c95c19bc87e97e66b6307712519608c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20Gon=C3=A7alves?= Date: Mon, 25 Nov 2024 12:16:10 +0000 Subject: [PATCH 2/4] feat(files): add support for Files API (personal and shared spaces) --- singlestoredb/__init__.py | 2 +- singlestoredb/management/__init__.py | 1 + singlestoredb/management/files.py | 583 ++++++++++++++++++++++++++- 3 files changed, 584 insertions(+), 2 deletions(-) diff --git a/singlestoredb/__init__.py b/singlestoredb/__init__.py index 5700d9729..155ac24b3 100644 --- a/singlestoredb/__init__.py +++ b/singlestoredb/__init__.py @@ -25,7 +25,7 @@ DataError, ManagementError, ) from .management import ( - manage_cluster, manage_workspaces, + manage_cluster, manage_workspaces, manage_files, ) from .types import ( Date, Time, Timestamp, DateFromTicks, TimeFromTicks, TimestampFromTicks, diff --git a/singlestoredb/management/__init__.py b/singlestoredb/management/__init__.py index 3a4deeb68..0f4887fcb 100644 --- a/singlestoredb/management/__init__.py +++ b/singlestoredb/management/__init__.py @@ -1,5 +1,6 @@ #!/usr/bin/env python from .cluster import manage_cluster +from .files import manage_files from .manager import get_token from .workspace import get_organization from .workspace import get_secret diff --git a/singlestoredb/management/files.py b/singlestoredb/management/files.py index 1f60592a3..48ea7e5a2 100644 --- a/singlestoredb/management/files.py +++ b/singlestoredb/management/files.py @@ -16,12 +16,18 @@ from typing import TextIO from typing import Union +from .. import config from ..exceptions import ManagementError +from .manager import Manager from .utils import PathLike from .utils import to_datetime from .utils import vars_to_str +PERSONAL_SPACE = 'personal' +SHARED_SPACE = 'shared' + + class FilesObject(object): """ File / folder object. @@ -29,7 +35,8 @@ class FilesObject(object): It can belong to either a workspace stage or personal/shared space. This object is not instantiated directly. It is used in the results - of various operations in ``WorkspaceGroup.stage`` methods. + of various operations in ``WorkspaceGroup.stage``, ``FilesManager.personal_space`` + and ``FilesManager.shared_space`` methods. """ @@ -463,3 +470,577 @@ def __str__(self) -> str: @abstractmethod def __repr__(self) -> str: pass + + +class FilesManager(Manager): + """ + SingleStoreDB files manager. + + This class should be instantiated using :func:`singlestoredb.manage_files`. + + Parameters + ---------- + access_token : str, optional + The API key or other access token for the files management API + version : str, optional + Version of the API to use + base_url : str, optional + Base URL of the files management API + + See Also + -------- + :func:`singlestoredb.manage_files` + + """ + + #: Management API version if none is specified. + default_version = config.get_option('management.version') + + #: Base URL if none is specified. + default_base_url = config.get_option('management.base_url') + + #: Object type + obj_type = 'file' + + @property + def personal_space(self) -> FileSpace: + """Return the personal file space.""" + return FileSpace(PERSONAL_SPACE, self) + + @property + def shared_space(self) -> FileSpace: + """Return the shared file space.""" + return FileSpace(SHARED_SPACE, self) + + +def manage_files( + access_token: Optional[str] = None, + version: Optional[str] = None, + base_url: Optional[str] = None, + *, + organization_id: Optional[str] = None, +) -> FilesManager: + """ + Retrieve a SingleStoreDB files manager. + + Parameters + ---------- + access_token : str, optional + The API key or other access token for the files management API + version : str, optional + Version of the API to use + base_url : str, optional + Base URL of the files management API + organization_id : str, optional + ID of organization, if using a JWT for authentication + + Returns + ------- + :class:`FilesManager` + + """ + return FilesManager( + access_token=access_token, base_url=base_url, + version=version, organization_id=organization_id, + ) + + +class FileSpace(FileLocation): + """ + FileSpace manager. + + This object is not instantiated directly. + It is returned by ``FilesManager.personal_space`` or ``FilesManager.shared_space``. + + """ + + def __init__(self, location: str, manager: FilesManager): + self._location = location + self._manager = manager + + def open( + self, + path: PathLike, + mode: str = 'r', + encoding: Optional[str] = None, + ) -> Union[io.StringIO, io.BytesIO]: + """ + Open a file path for reading or writing. + + Parameters + ---------- + path : Path or str + The file path to read / write + mode : str, optional + The read / write mode. The following modes are supported: + * 'r' open for reading (default) + * 'w' open for writing, truncating the file first + * 'x' create a new file and open it for writing + The data type can be specified by adding one of the following: + * 'b' binary mode + * 't' text mode (default) + encoding : str, optional + The string encoding to use for text + + Returns + ------- + FilesObjectBytesReader - 'rb' or 'b' mode + FilesObjectBytesWriter - 'wb' or 'xb' mode + FilesObjectTextReader - 'r' or 'rt' mode + FilesObjectTextWriter - 'w', 'x', 'wt' or 'xt' mode + + """ + if '+' in mode or 'a' in mode: + raise ManagementError(msg='modifying an existing file is not supported') + + if 'w' in mode or 'x' in mode: + exists = self.exists(path) + if exists: + if 'x' in mode: + raise FileExistsError(f'file path already exists: {path}') + self.remove(path) + if 'b' in mode: + return FilesObjectBytesWriter(b'', self, path) + return FilesObjectTextWriter('', self, path) + + if 'r' in mode: + content = self.download_file(path) + if isinstance(content, bytes): + if 'b' in mode: + return FilesObjectBytesReader(content) + encoding = 'utf-8' if encoding is None else encoding + return FilesObjectTextReader(content.decode(encoding)) + + if isinstance(content, str): + return FilesObjectTextReader(content) + + raise ValueError(f'unrecognized file content type: {type(content)}') + + raise ValueError(f'must have one of create/read/write mode specified: {mode}') + + def upload_file( + self, + local_path: Union[PathLike, TextIO, BinaryIO], + path: PathLike, + *, + overwrite: bool = False, + ) -> FilesObject: + """ + Upload a local file. + + Parameters + ---------- + local_path : Path or str or file-like + Path to the local file or an open file object + path : Path or str + Path to the file + overwrite : bool, optional + Should the ``path`` be overwritten if it exists already? + + """ + if isinstance(local_path, (TextIO, BinaryIO)): + pass + elif not os.path.isfile(local_path): + raise IsADirectoryError(f'local path is not a file: {local_path}') + + if self.exists(path): + if not overwrite: + raise OSError(f'file path already exists: {path}') + + self.remove(path) + + if isinstance(local_path, (TextIO, BinaryIO)): + return self._upload(local_path, path, overwrite=overwrite) + return self._upload(open(local_path, 'rb'), path, overwrite=overwrite) + + # TODO: remove from FileLocation? + def upload_folder( + self, + local_path: PathLike, + path: PathLike, + *, + overwrite: bool = False, + recursive: bool = True, + include_root: bool = False, + ignore: Optional[Union[PathLike, List[PathLike]]] = None, + ) -> FilesObject: + """ + Upload a folder recursively. + + Only the contents of the folder are uploaded. To include the + folder name itself in the target path use ``include_root=True``. + + Parameters + ---------- + local_path : Path or str + Local directory to upload + path : Path or str + Path of folder to upload to + overwrite : bool, optional + If a file already exists, should it be overwritten? + recursive : bool, optional + Should nested folders be uploaded? + include_root : bool, optional + Should the local root folder itself be uploaded as the top folder? + ignore : Path or str or List[Path] or List[str], optional + Glob patterns of files to ignore, for example, '**/*.pyc` will + ignore all '*.pyc' files in the directory tree + + """ + raise ManagementError( + msg='Operation not supported: directories are currently not allowed ' + 'in Files API', + ) + + def _upload( + self, + content: Union[str, bytes, TextIO, BinaryIO], + path: PathLike, + *, + overwrite: bool = False, + ) -> FilesObject: + """ + Upload content to a file. + + Parameters + ---------- + content : str or bytes or file-like + Content to upload + path : Path or str + Path to the file + overwrite : bool, optional + Should the ``path`` be overwritten if it exists already? + + """ + if self.exists(path): + if not overwrite: + raise OSError(f'file path already exists: {path}') + self.remove(path) + + self._manager._put( + f'files/fs/{self._location}/{path}', + files={'file': content}, + headers={'Content-Type': None}, + ) + + return self.info(path) + + # TODO: remove from FileLocation? + def mkdir(self, path: PathLike, overwrite: bool = False) -> FilesObject: + """ + Make a directory in the file space. + + Parameters + ---------- + path : Path or str + Path of the folder to create + overwrite : bool, optional + Should the file path be overwritten if it exists already? + + Returns + ------- + FilesObject + + """ + raise ManagementError( + msg='Operation not supported: directories are currently not allowed ' + 'in Files API', + ) + + mkdirs = mkdir + + def rename( + self, + old_path: PathLike, + new_path: PathLike, + *, + overwrite: bool = False, + ) -> FilesObject: + """ + Move the file to a new location. + + Parameters + ----------- + old_path : Path or str + Original location of the path + new_path : Path or str + New location of the path + overwrite : bool, optional + Should the ``new_path`` be overwritten if it exists already? + + """ + if not self.exists(old_path): + raise OSError(f'file path does not exist: {old_path}') + + if str(old_path).endswith('/') or str(new_path).endswith('/'): + raise ManagementError( + msg='Operation not supported: directories are currently not allowed ' + 'in Files API', + ) + + if self.exists(new_path): + if not overwrite: + raise OSError(f'file path already exists: {new_path}') + + self.remove(new_path) + + self._manager._patch( + f'files/fs/{self._location}/{old_path}', + json=dict(newPath=new_path), + ) + + return self.info(new_path) + + def info(self, path: PathLike) -> FilesObject: + """ + Return information about a file location. + + Parameters + ---------- + path : Path or str + Path to the file + + Returns + ------- + FilesObject + + """ + res = self._manager._get( + re.sub(r'/+$', r'/', f'files/fs/{self._location}/{path}'), + params=dict(metadata=1), + ).json() + + return FilesObject.from_dict(res, self) + + def exists(self, path: PathLike) -> bool: + """ + Does the given file path exist? + + Parameters + ---------- + path : Path or str + Path to file object + + Returns + ------- + bool + + """ + try: + self.info(path) + return True + except ManagementError as exc: + if exc.errno == 404: + return False + raise + + # TODO: remove from FileLocation? + def is_dir(self, path: PathLike) -> bool: + """ + Is the given file path a directory? + + Parameters + ---------- + path : Path or str + Path to file object + + Returns + ------- + bool + + """ + try: + return self.info(path).type == 'directory' + except ManagementError as exc: + if exc.errno == 404: + return False + raise + + # TODO: remove from FileLocation? + def is_file(self, path: PathLike) -> bool: + """ + Is the given file path a file? + + Parameters + ---------- + path : Path or str + Path to file object + + Returns + ------- + bool + + """ + try: + return self.info(path).type != 'directory' + except ManagementError as exc: + if exc.errno == 404: + return False + raise + + def _list_root_dir(self) -> List[str]: + """ + Return the names of files in the root directory. + Parameters + ---------- + """ + res = self._manager._get( + f'files/fs/{self._location}', + ).json() + return [x['path'] for x in res['content'] or []] + + # TODO: remove from FileLocation? + def listdir( + self, + path: PathLike = '/', + *, + recursive: bool = False, + ) -> List[str]: + """ + List the files / folders at the given path. + + Parameters + ---------- + path : Path or str, optional + Path to the file location + + Returns + ------- + List[str] + + """ + if path == '' or path == '/': + return self._list_root_dir() + + raise ManagementError( + msg='Operation not supported: directories are currently not allowed ' + 'in Files API', + ) + + def download_file( + self, + path: PathLike, + local_path: Optional[PathLike] = None, + *, + overwrite: bool = False, + encoding: Optional[str] = None, + ) -> Optional[Union[bytes, str]]: + """ + Download the content of a file path. + + Parameters + ---------- + path : Path or str + Path to the file + local_path : Path or str + Path to local file target location + overwrite : bool, optional + Should an existing file be overwritten if it exists? + encoding : str, optional + Encoding used to convert the resulting data + + Returns + ------- + bytes or str - ``local_path`` is None + None - ``local_path`` is a Path or str + + """ + if local_path is not None and not overwrite and os.path.exists(local_path): + raise OSError('target file already exists; use overwrite=True to replace') + if self.is_dir(path): + raise IsADirectoryError(f'file path is a directory: {path}') + + out = self._manager._get( + f'files/fs/{self._location}/{path}', + ).content + + if local_path is not None: + with open(local_path, 'wb') as outfile: + outfile.write(out) + return None + + if encoding: + return out.decode(encoding) + + return out + + # TODO: remove from FileLocation? + def download_folder( + self, + path: PathLike, + local_path: PathLike = '.', + *, + overwrite: bool = False, + ) -> None: + """ + Download a FileSpace folder to a local directory. + + Parameters + ---------- + path : Path or str + Path to the file + local_path : Path or str + Path to local directory target location + overwrite : bool, optional + Should an existing directory / files be overwritten if they exist? + + """ + raise ManagementError( + msg='Operation not supported: directories are currently not allowed ' + 'in Files API', + ) + + def remove(self, path: PathLike) -> None: + """ + Delete a file location. + + Parameters + ---------- + path : Path or str + Path to the location + + """ + if self.is_dir(path): + raise IsADirectoryError('file path is a directory') + + self._manager._delete(f'files/fs/{self._location}/{path}') + + # TODO: remove from FileLocation? + def removedirs(self, path: PathLike) -> None: + """ + Delete a folder recursively. + + Parameters + ---------- + path : Path or str + Path to the file location + + """ + raise ManagementError( + msg='Operation not supported: directories are currently not allowed ' + 'in Files API', + ) + + # TODO: remove from FileLocation? + def rmdir(self, path: PathLike) -> None: + """ + Delete a folder. + + Parameters + ---------- + path : Path or str + Path to the file location + + """ + raise ManagementError( + msg='Operation not supported: directories are currently not allowed ' + 'in Files API', + ) + + def __str__(self) -> str: + """Return string representation.""" + return vars_to_str(self) + + def __repr__(self) -> str: + """Return string representation.""" + return str(self) From fc5bbaace95e9996d3aa17191efdf26cb7c66ba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20Gon=C3=A7alves?= Date: Mon, 2 Dec 2024 11:57:35 +0000 Subject: [PATCH 3/4] feat(files): add unit tests --- singlestoredb/tests/test.ipynb | 18 ++ singlestoredb/tests/test2.ipynb | 18 ++ singlestoredb/tests/test_management.py | 274 ++++++++++++++++++++++++- 3 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 singlestoredb/tests/test.ipynb create mode 100644 singlestoredb/tests/test2.ipynb diff --git a/singlestoredb/tests/test.ipynb b/singlestoredb/tests/test.ipynb new file mode 100644 index 000000000..5f664f402 --- /dev/null +++ b/singlestoredb/tests/test.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test Notebook" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/singlestoredb/tests/test2.ipynb b/singlestoredb/tests/test2.ipynb new file mode 100644 index 000000000..4991bc6bc --- /dev/null +++ b/singlestoredb/tests/test2.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test Notebook 2" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/singlestoredb/tests/test_management.py b/singlestoredb/tests/test_management.py index 6d6a9b0e9..28006148c 100755 --- a/singlestoredb/tests/test_management.py +++ b/singlestoredb/tests/test_management.py @@ -740,7 +740,7 @@ def test_os_rename(self): 'rename_test_2/nest_1/nested_rename_test_3.sql', overwrite=True, ) - def test_stage_object(self): + def test_file_object(self): st = self.wg.stage st.mkdir('obj_test') @@ -1028,3 +1028,275 @@ def test_job_with_database_target(self): assert deleted job = job_manager.get(job.job_id) assert job.terminated_at is not None + + +@pytest.mark.management +class TestFileSpaces(unittest.TestCase): + + manager = None + personal_space = None + shared_space = None + + @classmethod + def setUpClass(cls): + cls.manager = s2.manage_files() + cls.personal_space = cls.manager.personal_space + cls.shared_space = cls.manager.shared_space + + @classmethod + def tearDownClass(cls): + cls.manager = None + cls.personal_space = None + cls.shared_space = None + + def test_upload_file(self): + for space in [self.personal_space, self.shared_space]: + root = space.info('/') + assert str(root.path) == '/' + assert root.type == 'directory' + + # Upload files + f = space.upload_file( + TEST_DIR / 'test.ipynb', + 'upload_test.ipynb', + ) + assert str(f.path) == 'upload_test.ipynb' + assert f.type == 'notebook' + + # Download and compare to original + txt = f.download(encoding='utf-8') + assert txt == open(TEST_DIR / 'test.ipynb').read() + + # Make sure we can't overwrite + with self.assertRaises(OSError): + space.upload_file( + TEST_DIR / 'test.ipynb', + 'upload_test.ipynb', + ) + + # Force overwrite with new content + f = space.upload_file( + TEST_DIR / 'test2.ipynb', + 'upload_test.ipynb', overwrite=True, + ) + assert str(f.path) == 'upload_test.ipynb' + assert f.type == 'notebook' + + # Verify new content + txt = f.download(encoding='utf-8') + assert txt == open(TEST_DIR / 'test2.ipynb').read() + + # Make sure we can't upload a folder + with self.assertRaises(s2.ManagementError): + space.upload_folder(TEST_DIR, 'test') + + # Cleanup + space.remove('upload_test.ipynb') + + def test_open(self): + for space in [self.personal_space, self.shared_space]: + # See if error is raised for non-existent file + with self.assertRaises(s2.ManagementError): + space.open('open_test.ipynb', 'r') + + # Load test file + space.upload_file(TEST_DIR / 'test.ipynb', 'open_test.ipynb') + + # Read file using `open` + with space.open('open_test.ipynb', 'r') as rfile: + assert rfile.read() == open(TEST_DIR / 'test.ipynb').read() + + # Read file using `open` with 'rt' mode + with space.open('open_test.ipynb', 'rt') as rfile: + assert rfile.read() == open(TEST_DIR / 'test.ipynb').read() + + # Read file using `open` with 'rb' mode + with space.open('open_test.ipynb', 'rb') as rfile: + assert rfile.read() == open(TEST_DIR / 'test.ipynb', 'rb').read() + + # Read file using `open` with 'rb' mode + with self.assertRaises(ValueError): + with space.open('open_test.ipynb', 'b') as rfile: + pass + + # Attempt overwrite file using `open` with mode 'x' + with self.assertRaises(OSError): + with space.open('open_test.ipynb', 'x') as wfile: + pass + + # Attempt overwrite file using `open` with mode 'w' + with space.open('open_test.ipynb', 'w') as wfile: + wfile.write(open(TEST_DIR / 'test2.ipynb').read()) + + txt = space.download_file('open_test.ipynb', encoding='utf-8') + + assert txt == open(TEST_DIR / 'test2.ipynb').read() + + # Test writer without context manager + wfile = space.open('open_raw_test.ipynb', 'w') + for line in open(TEST_DIR / 'test.ipynb'): + wfile.write(line) + wfile.close() + + txt = space.download_file( + 'open_raw_test.ipynb', + encoding='utf-8', + ) + + assert txt == open(TEST_DIR / 'test.ipynb').read() + + # Test reader without context manager + rfile = space.open('open_raw_test.ipynb', 'r') + txt = '' + for line in rfile: + txt += line + rfile.close() + + assert txt == open(TEST_DIR / 'test.ipynb').read() + + # Cleanup + space.remove('open_test.ipynb') + space.remove('open_raw_test.ipynb') + + def test_obj_open(self): + for space in [self.personal_space, self.shared_space]: + # Load test file + f = space.upload_file( + TEST_DIR / 'test.ipynb', + 'obj_open_test.ipynb', + ) + + # Read file using `open` + with f.open() as rfile: + assert rfile.read() == open(TEST_DIR / 'test.ipynb').read() + + # Make sure directories error out + with self.assertRaises(s2.ManagementError): + space.mkdir('obj_open_dir') + + # Write file using `open` + with f.open('w', encoding='utf-8') as wfile: + wfile.write(open(TEST_DIR / 'test2.ipynb').read()) + + assert f.download(encoding='utf-8') == open(TEST_DIR / 'test2.ipynb').read() + + # Test writer without context manager + wfile = f.open('w') + for line in open(TEST_DIR / 'test.ipynb'): + wfile.write(line) + wfile.close() + + txt = space.download_file(f.path, encoding='utf-8') + + assert txt == open(TEST_DIR / 'test.ipynb').read() + + # Test reader without context manager + rfile = f.open('r') + txt = '' + for line in rfile: + txt += line + rfile.close() + + assert txt == open(TEST_DIR / 'test.ipynb').read() + + # Cleanup + space.remove('obj_open_test.ipynb') + + def test_os_directories(self): + for space in [self.personal_space, self.shared_space]: + # Make sure directories error out + with self.assertRaises(s2.ManagementError): + space.mkdir('mkdir_test_1') + + with self.assertRaises(s2.ManagementError): + space.exists('mkdir_test_1/') + + out = space.listdir('/') + assert 'mkdir_test_1/' not in out + + with self.assertRaises(s2.ManagementError): + space.rmdir('mkdir_test_1/') + + def test_os_rename(self): + for space in [self.personal_space, self.shared_space]: + space.upload_file( + TEST_DIR / 'test.ipynb', + 'rename_test.ipynb', + ) + assert 'rename_test.ipynb' in space.listdir('/') + assert 'rename_test_2.ipynb' not in space.listdir('/') + + space.rename( + 'rename_test.ipynb', + 'rename_test_2.ipynb', + ) + assert 'rename_test.ipynb' not in space.listdir('/') + assert 'rename_test_2.ipynb' in space.listdir('/') + + # non-existent file + with self.assertRaises(OSError): + space.rename('rename_foo.ipynb', 'rename_foo_2.ipynb') + + space.upload_file( + TEST_DIR / 'test.ipynb', + 'rename_test_3.ipynb', + ) + + # overwrite + with self.assertRaises(OSError): + space.rename( + 'rename_test_2.ipynb', + 'rename_test_3.ipynb', + ) + + space.rename( + 'rename_test_2.ipynb', + 'rename_test_3.ipynb', overwrite=True, + ) + + # Cleanup + space.remove('rename_test_3.ipynb') + + def test_file_object(self): + for space in [self.personal_space, self.shared_space]: + f = space.upload_file( + TEST_DIR / 'test.ipynb', + 'obj_test.ipynb', + ) + + assert not f.is_dir() + assert f.is_file() + + # abspath / basename / dirname / exists + assert f.abspath() == 'obj_test.ipynb' + assert f.basename() == 'obj_test.ipynb' + assert f.dirname() == '/' + assert f.exists() + + # download + assert f.download(encoding='utf-8') == \ + open(TEST_DIR / 'test.ipynb', 'r').read() + assert f.download() == open(TEST_DIR / 'test.ipynb', 'rb').read() + + assert space.is_file('obj_test.ipynb') + f.remove() + assert not space.is_file('obj_test.ipynb') + + # mtime / ctime + assert f.getmtime() > 0 + assert f.getctime() > 0 + + # rename + f = space.upload_file( + TEST_DIR / 'test.ipynb', + 'obj_test.ipynb', + ) + assert space.exists('obj_test.ipynb') + assert not space.exists('obj_test_2.ipynb') + f.rename('obj_test_2.ipynb') + assert not space.exists('obj_test.ipynb') + assert space.exists('obj_test_2.ipynb') + assert f.abspath() == 'obj_test_2.ipynb' + + # Cleanup + space.remove('obj_test_2.ipynb') From 180e423350abd9ea85938faca9847ff1357b6456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20Gon=C3=A7alves?= Date: Mon, 25 Nov 2024 15:41:11 +0000 Subject: [PATCH 4/4] feat(fusion-sql): add support for Files API (personal and shared spaces) --- singlestoredb/fusion/handler.py | 3 + singlestoredb/fusion/handlers/files.py | 690 +++++++++++++++++++++++++ singlestoredb/fusion/handlers/utils.py | 38 ++ singlestoredb/management/files.py | 8 - 4 files changed, 731 insertions(+), 8 deletions(-) create mode 100644 singlestoredb/fusion/handlers/files.py diff --git a/singlestoredb/fusion/handler.py b/singlestoredb/fusion/handler.py index 1fcde40d4..5eac9bcc4 100644 --- a/singlestoredb/fusion/handler.py +++ b/singlestoredb/fusion/handler.py @@ -74,6 +74,9 @@ '': '', '': '', '': '', + '': r''' + file_type = { FILE | FOLDER } + ''', } BUILTIN_DEFAULTS = { # type: ignore diff --git a/singlestoredb/fusion/handlers/files.py b/singlestoredb/fusion/handlers/files.py new file mode 100644 index 000000000..727eb8dde --- /dev/null +++ b/singlestoredb/fusion/handlers/files.py @@ -0,0 +1,690 @@ +#!/usr/bin/env python3 +from typing import Any +from typing import Dict +from typing import Optional + +from .. import result +from ..handler import SQLHandler +from ..result import FusionSQLResult +from .utils import dt_isoformat +from .utils import get_file_space + + +class ShowFilesHandler(SQLHandler): + """ + Generic handler for listing files in a personal/shared space. + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + file_space = get_file_space(params) + + res = FusionSQLResult() + res.add_field('Name', result.STRING) + + if params['extended']: + res.add_field('Type', result.STRING) + res.add_field('Size', result.INTEGER) + res.add_field('Writable', result.STRING) + res.add_field('CreatedAt', result.DATETIME) + res.add_field('LastModifiedAt', result.DATETIME) + + files = [] + for x in file_space.listdir( + params['at_path'] or '/', + recursive=params['recursive'], + ): + info = file_space.info(x) + files.append( + tuple([ + x, info.type, info.size or 0, info.writable, + dt_isoformat(info.created_at), + dt_isoformat(info.last_modified_at), + ]), + ) + res.set_rows(files) + + else: + res.set_rows([(x,) for x in file_space.listdir( + params['at_path'] or '/', + recursive=params['recursive'], + )]) + + if params['like']: + res = res.like(Name=params['like']) + + return res.order_by(**params['order_by']).limit(params['limit']) + + +class ShowPersonalFilesHandler(ShowFilesHandler): + """ + SHOW PERSONAL FILES + [ at_path ] [ ] + [ ] + [ ] [ recursive ] [ extended ]; + + # File path to list + at_path = AT '' + + # Should the listing be recursive? + recursive = RECURSIVE + + # Should extended attributes be shown? + extended = EXTENDED + + Description + ----------- + Displays a list of files in a personal/shared space. + + Arguments + --------- + * ````: A path in the personal/shared space. + * ````: A pattern similar to SQL LIKE clause. + Uses ``%`` as the wildcard character. + + Remarks + ------- + * Use the ``LIKE`` clause to specify a pattern and return only the + files that match the specified pattern. + * The ``LIMIT`` clause limits the number of results to the + specified number. + * Use the ``ORDER BY`` clause to sort the results by the specified + key. By default, the results are sorted in the ascending order. + * The ``AT PATH`` clause specifies the path in the personal/shared + space to list the files from. + * Use the ``RECURSIVE`` clause to list the files recursively. + * To return more information about the files, use the ``EXTENDED`` + clause. + + Examples + -------- + The following command lists the files at a specific path:: + + SHOW PERSONAL FILES AT PATH "/data/"; + + The following command lists the files recursively with + additional information:: + + SHOW PERSONAL FILES RECURSIVE EXTENDED; + + See Also + -------- + * ``SHOW SHARED FILES`` + * ``UPLOAD PERSONAL FILE`` + * ``UPLOAD SHARED FILE`` + * ``DOWNLOAD PERSONAL FILE`` + * ``DOWNLOAD SHARED FILE`` + + """ # noqa: E501 + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'PERSONAL' + return super().run(params) + + +class ShowSharedFilesHandler(ShowFilesHandler): + """ + SHOW SHARED FILES + [ at_path ] [ ] + [ ] + [ ] [ recursive ] [ extended ]; + + # File path to list + at_path = AT '' + + # Should the listing be recursive? + recursive = RECURSIVE + + # Should extended attributes be shown? + extended = EXTENDED + + Description + ----------- + Displays a list of files in a personal/shared space. + + Arguments + --------- + * ````: A path in the personal/shared space. + * ````: A pattern similar to SQL LIKE clause. + Uses ``%`` as the wildcard character. + + Remarks + ------- + * Use the ``LIKE`` clause to specify a pattern and return only the + files that match the specified pattern. + * The ``LIMIT`` clause limits the number of results to the + specified number. + * Use the ``ORDER BY`` clause to sort the results by the specified + key. By default, the results are sorted in the ascending order. + * The ``AT PATH`` clause specifies the path in the personal/shared + space to list the files from. + * Use the ``RECURSIVE`` clause to list the files recursively. + * To return more information about the files, use the ``EXTENDED`` + clause. + + Examples + -------- + The following command lists the files at a specific path:: + + SHOW SHARED FILES AT PATH "/data/"; + + The following command lists the files recursively with + additional information:: + + SHOW SHARED FILES RECURSIVE EXTENDED; + + See Also + -------- + * ``SHOW PERSONAL FILES`` + * ``UPLOAD PERSONAL FILE`` + * ``UPLOAD SHARED FILE`` + * ``DOWNLOAD PERSONAL FILE`` + * ``DOWNLOAD SHARED FILE`` + + """ # noqa: E501 + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'SHARED' + return super().run(params) + + +ShowPersonalFilesHandler.register(overwrite=True) +ShowSharedFilesHandler.register(overwrite=True) + + +class UploadFileHandler(SQLHandler): + """ + Generic handler for uploading files to a personal/shared space. + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + file_space = get_file_space(params) + file_space.upload_file( + params['local_path'], params['path'], + overwrite=params['overwrite'], + ) + return None + + +class UploadPersonalFileHandler(UploadFileHandler): + """ + UPLOAD PERSONAL FILE TO path + FROM local_path [ overwrite ]; + + # Path to file + path = '' + + # Path to local file + local_path = '' + + # Should an existing file be overwritten? + overwrite = OVERWRITE + + Description + ----------- + Uploads a file to a personal/shared space. + + Arguments + --------- + * ````: The path in the personal/shared space where the file is uploaded. + * ````: The path to the file to upload in the local + directory. + + Remarks + ------- + * If the ``OVERWRITE`` clause is specified, any existing file at the + specified path in the personal/shared space is overwritten. + + Examples + -------- + The following command uploads a file to a personal/shared space and overwrite any + existing files at the specified path:: + + UPLOAD PERSONAL FILE TO '/data/stats.csv' + FROM '/tmp/user/stats.csv' OVERWRITE; + + See Also + -------- + * ``UPLOAD SHARED FILE`` + * ``DOWNLOAD PERSONAL FILE`` + * ``DOWNLOAD SHARED FILE`` + + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'PERSONAL' + return super().run(params) + + +class UploadSharedFileHandler(UploadFileHandler): + """ + UPLOAD SHARED FILE TO path + FROM local_path [ overwrite ]; + + # Path to file + path = '' + + # Path to local file + local_path = '' + + # Should an existing file be overwritten? + overwrite = OVERWRITE + + Description + ----------- + Uploads a file to a personal/shared space. + + Arguments + --------- + * ````: The path in the personal/shared space where the file is uploaded. + * ````: The path to the file to upload in the local + directory. + + Remarks + ------- + * If the ``OVERWRITE`` clause is specified, any existing file at the + specified path in the personal/shared space is overwritten. + + Examples + -------- + The following command uploads a file to a personal/shared space and overwrite any + existing files at the specified path:: + + UPLOAD SHARED FILE TO '/data/stats.csv' + FROM '/tmp/user/stats.csv' OVERWRITE; + + See Also + -------- + * ``UPLOAD PERSONAL FILE`` + * ``DOWNLOAD PERSONAL FILE`` + * ``DOWNLOAD SHARED FILE`` + + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'SHARED' + return super().run(params) + + +UploadPersonalFileHandler.register(overwrite=True) +UploadSharedFileHandler.register(overwrite=True) + + +class DownloadFileHandler(SQLHandler): + """ + Generic handler for downloading files from a personal/shared space. + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + file_space = get_file_space(params) + + out = file_space.download_file( + params['path'], + local_path=params['local_path'] or None, + overwrite=params['overwrite'], + encoding=params['encoding'] or None, + ) + + if not params['local_path']: + res = FusionSQLResult() + if params['encoding']: + res.add_field('Data', result.STRING) + else: + res.add_field('Data', result.BLOB) + res.set_rows([(out,)]) + return res + + return None + + +class DownloadPersonalFileHandler(DownloadFileHandler): + """ + DOWNLOAD PERSONAL FILE path + [ local_path ] + [ overwrite ] + [ encoding ]; + + # Path to file + path = '' + + # Path to local file + local_path = TO '' + + # Should an existing file be overwritten? + overwrite = OVERWRITE + + # File encoding + encoding = ENCODING '' + + Description + ----------- + Download a file from a personal/shared space. + + Arguments + --------- + * ````: The path to the file to download in a personal/shared space. + * ````: The encoding to apply to the downloaded file. + * ````: Specifies the path in the local directory + where the file is downloaded. + + Remarks + ------- + * If the ``OVERWRITE`` clause is specified, any existing file at + the download location is overwritten. + * By default, files are downloaded in binary encoding. To view + the contents of the file on the standard output, use the + ``ENCODING`` clause and specify an encoding. + * If ```` is not specified, the file is displayed + on the standard output. + + Examples + -------- + The following command displays the contents of the file on the + standard output:: + + DOWNLOAD PERSONAL FILE '/data/stats.csv' ENCODING 'utf8'; + + The following command downloads a file to a specific location and + overwrites any existing file with the name ``stats.csv`` on the local storage:: + + DOWNLOAD PERSONAL FILE '/data/stats.csv' + TO '/tmp/data.csv' OVERWRITE; + + See Also + -------- + * ``DOWNLOAD SHARED FILE`` + * ``UPLOAD PERSONAL FILE`` + * ``UPLOAD SHARED FILE`` + + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'PERSONAL' + return super().run(params) + + +class DownloadSharedFileHandler(DownloadFileHandler): + """ + DOWNLOAD SHARED FILE path + [ local_path ] + [ overwrite ] + [ encoding ]; + + # Path to file + path = '' + + # Path to local file + local_path = TO '' + + # Should an existing file be overwritten? + overwrite = OVERWRITE + + # File encoding + encoding = ENCODING '' + + Description + ----------- + Download a file from a personal/shared space. + + Arguments + --------- + * ````: The path to the file to download in a personal/shared space. + * ````: The encoding to apply to the downloaded file. + * ````: Specifies the path in the local directory + where the file is downloaded. + + Remarks + ------- + * If the ``OVERWRITE`` clause is specified, any existing file at + the download location is overwritten. + * By default, files are downloaded in binary encoding. To view + the contents of the file on the standard output, use the + ``ENCODING`` clause and specify an encoding. + * If ```` is not specified, the file is displayed + on the standard output. + + Examples + -------- + The following command displays the contents of the file on the + standard output:: + + DOWNLOAD SHARED FILE '/data/stats.csv' ENCODING 'utf8'; + + The following command downloads a file to a specific location and + overwrites any existing file with the name ``stats.csv`` on the local storage:: + + DOWNLOAD SHARED FILE '/data/stats.csv' + TO '/tmp/data.csv' OVERWRITE; + + See Also + -------- + * ``DOWNLOAD PERSONAL FILE`` + * ``UPLOAD PERSONAL FILE`` + * ``UPLOAD SHARED FILE`` + + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'SHARED' + return super().run(params) + + +DownloadPersonalFileHandler.register(overwrite=True) +DownloadSharedFileHandler.register(overwrite=True) + + +class DropHandler(SQLHandler): + """ + Generic handler for deleting files/folders from a personal/shared space. + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + file_space = get_file_space(params) + + file_type = params['file_type'] + if not file_type: + raise KeyError('file type was not specified') + + file_type = file_type.lower() + if file_type not in ['file', 'folder']: + raise ValueError('file type must be either FILE or FOLDER') + + if file_type == 'file': + file_space.remove(params['path']) + elif file_type == 'folder': + if params['recursive']: + file_space.removedirs(params['path']) + else: + file_space.rmdir(params['path']) + + return None + + +class DropPersonalHandler(DropHandler): + """ + DROP PERSONAL path + [ recursive ]; + + # Path to file + path = '' + + # Should folders be deleted recursively? + recursive = RECURSIVE + + Description + ----------- + Deletes a file/folder from a personal/shared space. + + Arguments + --------- + * ````: The type of the file, it can + be either 'FILE' or 'FOLDER'. + * ````: The path to the file to delete in a personal/shared space. + + Remarks + ------- + * The ``RECURSIVE`` clause indicates that the specified folder + is deleted recursively. + + Example + -------- + The following commands delete a file/folder from a personal/shared space:: + + DROP PERSONAL FILE '/data/stats.csv'; + DROP PERSONAL FOLDER '/data/' RECURSIVE; + + See Also + -------- + * ``DROP SHARED FILE`` + * ``DROP SHARED FOLDER`` + + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'PERSONAL' + return super().run(params) + + +class DropSharedHandler(DropHandler): + """ + DROP SHARED path + [ recursive ]; + + # Path to file + path = '' + + # Should folders be deleted recursively? + recursive = RECURSIVE + + Description + ----------- + Deletes a file/folder from a personal/shared space. + + Arguments + --------- + * ````: The type of the file, it can + be either 'FILE' or 'FOLDER'. + * ````: The path to the file to delete in a personal/shared space. + + Remarks + ------- + * The ``RECURSIVE`` clause indicates that the specified folder + is deleted recursively. + + Example + -------- + The following commands delete a file/folder from a personal/shared space:: + + DROP SHARED FILE '/data/stats.csv'; + DROP SHARED FOLDER '/data/' RECURSIVE; + + See Also + -------- + * ``DROP PERSONAL FILE`` + * ``DROP PERSONAL FOLDER`` + + """ # noqa: E501 + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'SHARED' + return super().run(params) + + +DropPersonalHandler.register(overwrite=True) +DropSharedHandler.register(overwrite=True) + + +class CreateFolderHandler(SQLHandler): + """ + Generic handler for creating folders in a personal/shared space. + """ + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + file_space = get_file_space(params) + file_space.mkdir(params['path'], overwrite=params['overwrite']) + return None + + +class CreatePersonalFolderHandler(CreateFolderHandler): + """ + CREATE PERSONAL FOLDER path + [ overwrite ]; + + # Path to folder + path = '' + + # Should an existing folder be overwritten? + overwrite = OVERWRITE + + Description + ----------- + Creates a new folder at the specified path in a personal/shared space. + + Arguments + --------- + * ````: The path in a personal/shared space where the folder + is created. The path must end with a trailing slash (/). + + Remarks + ------- + * If the ``OVERWRITE`` clause is specified, any existing + folder at the specified path is overwritten. + + Example + ------- + The following command creates a folder in a personal/shared space:: + + CREATE PERSONAL FOLDER `/data/csv/`; + + See Also + -------- + * ``CREATE SHARED FOLDER`` + + """ + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'PERSONAL' + return super().run(params) + + +class CreateSharedFolderHandler(CreateFolderHandler): + """ + CREATE SHARED FOLDER path + [ overwrite ]; + + # Path to folder + path = '' + + # Should an existing folder be overwritten? + overwrite = OVERWRITE + + Description + ----------- + Creates a new folder at the specified path in a personal/shared space. + + Arguments + --------- + * ````: The path in a personal/shared space where the folder + is created. The path must end with a trailing slash (/). + + Remarks + ------- + * If the ``OVERWRITE`` clause is specified, any existing + folder at the specified path is overwritten. + + Example + ------- + The following command creates a folder in a personal/shared space:: + + CREATE SHARED FOLDER `/data/csv/`; + + See Also + -------- + * ``CREATE PERSONAL FOLDER`` + + """ + + def run(self, params: Dict[str, Any]) -> Optional[FusionSQLResult]: + params['file_location'] = 'SHARED' + return super().run(params) + + +CreatePersonalFolderHandler.register(overwrite=True) +CreateSharedFolderHandler.register(overwrite=True) diff --git a/singlestoredb/fusion/handlers/utils.py b/singlestoredb/fusion/handlers/utils.py index 27ffc1775..a3df6a740 100644 --- a/singlestoredb/fusion/handlers/utils.py +++ b/singlestoredb/fusion/handlers/utils.py @@ -8,6 +8,11 @@ from ...exceptions import ManagementError from ...management import manage_workspaces +from ...management.files import FilesManager +from ...management.files import FileSpace +from ...management.files import manage_files +from ...management.files import PERSONAL_SPACE +from ...management.files import SHARED_SPACE from ...management.workspace import StarterWorkspace from ...management.workspace import Workspace from ...management.workspace import WorkspaceGroup @@ -19,6 +24,11 @@ def get_workspace_manager() -> WorkspaceManager: return manage_workspaces() +def get_files_manager() -> FilesManager: + """Return a new files manager.""" + return manage_files() + + def dt_isoformat(dt: Optional[datetime.datetime]) -> Optional[str]: """Convert datetime to string.""" if dt is None: @@ -270,3 +280,31 @@ def get_deployment( raise raise KeyError('no deployment was specified') + + +def get_file_space(params: Dict[str, Any]) -> FileSpace: + """ + Retrieve the specified file space. + + This function will get a file space from the + following parameters: + + * params['file_location'] + """ + manager = get_files_manager() + + file_location = params.get('file_location') + if file_location: + file_location_lower_case = file_location.lower() + if ( + file_location_lower_case != PERSONAL_SPACE and + file_location_lower_case != SHARED_SPACE + ): + raise ValueError(f'invalid file location: {file_location}') + + if file_location_lower_case == PERSONAL_SPACE: + return manager.personal_space + elif file_location_lower_case == SHARED_SPACE: + return manager.shared_space + + raise KeyError('no file space was specified') diff --git a/singlestoredb/management/files.py b/singlestoredb/management/files.py index 48ea7e5a2..682e1e81e 100644 --- a/singlestoredb/management/files.py +++ b/singlestoredb/management/files.py @@ -653,7 +653,6 @@ def upload_file( return self._upload(local_path, path, overwrite=overwrite) return self._upload(open(local_path, 'rb'), path, overwrite=overwrite) - # TODO: remove from FileLocation? def upload_folder( self, local_path: PathLike, @@ -725,7 +724,6 @@ def _upload( return self.info(path) - # TODO: remove from FileLocation? def mkdir(self, path: PathLike, overwrite: bool = False) -> FilesObject: """ Make a directory in the file space. @@ -834,7 +832,6 @@ def exists(self, path: PathLike) -> bool: return False raise - # TODO: remove from FileLocation? def is_dir(self, path: PathLike) -> bool: """ Is the given file path a directory? @@ -856,7 +853,6 @@ def is_dir(self, path: PathLike) -> bool: return False raise - # TODO: remove from FileLocation? def is_file(self, path: PathLike) -> bool: """ Is the given file path a file? @@ -889,7 +885,6 @@ def _list_root_dir(self) -> List[str]: ).json() return [x['path'] for x in res['content'] or []] - # TODO: remove from FileLocation? def listdir( self, path: PathLike = '/', @@ -964,7 +959,6 @@ def download_file( return out - # TODO: remove from FileLocation? def download_folder( self, path: PathLike, @@ -1005,7 +999,6 @@ def remove(self, path: PathLike) -> None: self._manager._delete(f'files/fs/{self._location}/{path}') - # TODO: remove from FileLocation? def removedirs(self, path: PathLike) -> None: """ Delete a folder recursively. @@ -1021,7 +1014,6 @@ def removedirs(self, path: PathLike) -> None: 'in Files API', ) - # TODO: remove from FileLocation? def rmdir(self, path: PathLike) -> None: """ Delete a folder.