Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
* Extracted folder/bucket scanning into a new `scan` module

## [1.16.0] - 2022-04-27

This release contains a preview of replication support. It allows for basic
Expand Down
30 changes: 17 additions & 13 deletions b2sdk/_v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,7 @@
from b2sdk.sync.action import B2HideAction
from b2sdk.sync.action import B2UploadAction
from b2sdk.sync.action import LocalDeleteAction
from b2sdk.sync.exception import EnvironmentEncodingError
from b2sdk.sync.exception import IncompleteSync
from b2sdk.sync.exception import InvalidArgument
from b2sdk.sync.folder import AbstractFolder
from b2sdk.sync.folder import B2Folder
from b2sdk.sync.folder import LocalFolder
from b2sdk.sync.folder_parser import parse_sync_folder
from b2sdk.sync.path import AbstractSyncPath, B2SyncPath, LocalSyncPath
from b2sdk.sync.policy import AbstractFileSyncPolicy
from b2sdk.sync.policy import CompareVersionMode
from b2sdk.sync.policy import NewerFileSyncMode
Expand All @@ -189,19 +182,30 @@
from b2sdk.sync.policy_manager import POLICY_MANAGER
from b2sdk.sync.report import SyncFileReporter
from b2sdk.sync.report import SyncReport
from b2sdk.sync.scan_policies import DEFAULT_SCAN_MANAGER
from b2sdk.sync.scan_policies import IntegerRange
from b2sdk.sync.scan_policies import RegexSet
from b2sdk.sync.scan_policies import ScanPoliciesManager
from b2sdk.sync.scan_policies import convert_dir_regex_to_dir_prefix_regex
from b2sdk.sync.sync import KeepOrDeleteMode
from b2sdk.sync.sync import Synchronizer
from b2sdk.sync.sync import zip_folders
from b2sdk.sync.encryption_provider import AbstractSyncEncryptionSettingsProvider
from b2sdk.sync.encryption_provider import BasicSyncEncryptionSettingsProvider
from b2sdk.sync.encryption_provider import ServerDefaultSyncEncryptionSettingsProvider
from b2sdk.sync.encryption_provider import SERVER_DEFAULT_SYNC_ENCRYPTION_SETTINGS_PROVIDER

# scan

from b2sdk.scan.exception import EnvironmentEncodingError
from b2sdk.scan.exception import InvalidArgument
from b2sdk.scan.folder import AbstractFolder
from b2sdk.scan.folder import B2Folder
from b2sdk.scan.folder import LocalFolder
from b2sdk.scan.folder_parser import parse_folder

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in v2 both parse_sync_folder and parse_folder should be available and there should be a test that makes sure we have it

from b2sdk.scan.path import AbstractPath, B2Path, LocalPath
from b2sdk.scan.policies import convert_dir_regex_to_dir_prefix_regex
from b2sdk.scan.policies import DEFAULT_SCAN_MANAGER
from b2sdk.scan.policies import IntegerRange
from b2sdk.scan.policies import RegexSet
from b2sdk.scan.policies import ScanPoliciesManager
from b2sdk.scan.report import Report
from b2sdk.scan.scan import zip_folders

# replication

from b2sdk.replication.setting import ReplicationConfigurationFactory
Expand Down
16 changes: 8 additions & 8 deletions b2sdk/_v3/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@
from b2sdk.exception import SSECKeyError
from b2sdk.exception import WrongEncryptionModeForBucketDefault
from b2sdk.exception import interpret_b2_error
from b2sdk.sync.exception import EmptyDirectory
from b2sdk.sync.exception import EnvironmentEncodingError
from b2sdk.sync.exception import IncompleteSync
from b2sdk.sync.exception import InvalidArgument
from b2sdk.sync.exception import NotADirectory
from b2sdk.sync.exception import UnableToCreateDirectory
from b2sdk.sync.exception import UnSyncableFilename
from b2sdk.sync.exception import check_invalid_argument
from b2sdk.scan.exception import UnableToCreateDirectory
from b2sdk.scan.exception import EmptyDirectory
from b2sdk.scan.exception import EnvironmentEncodingError
from b2sdk.scan.exception import InvalidArgument
from b2sdk.scan.exception import NotADirectory
from b2sdk.scan.exception import UnsupportedFilename
from b2sdk.scan.exception import check_invalid_argument

__all__ = (
'AccessDenied',
Expand Down Expand Up @@ -144,7 +144,7 @@
'UnknownHost',
'UnrecognizedBucketType',
'UnableToCreateDirectory',
'UnSyncableFilename',
'UnsupportedFilename',
'UnsatisfiableRange',
'UnusableFileName',
'interpret_b2_error',
Expand Down
105 changes: 105 additions & 0 deletions b2sdk/scan/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
######################################################################
#
# File: b2sdk/scan/exception.py
#
# Copyright 2022 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################

from contextlib import contextmanager
from typing import Iterator, Type

from ..exception import B2Error, B2SimpleError


class EnvironmentEncodingError(B2Error):
"""
Raised when a file name can not be decoded with system encoding.
"""

def __init__(self, filename, encoding):
"""
:param filename: an encoded file name
:type filename: str, bytes
:param str encoding: file name encoding
"""
super().__init__()
self.filename = filename
self.encoding = encoding

def __str__(self):
return """file name %s cannot be decoded with system encoding (%s).
We think this is an environment error which you should workaround by
setting your system encoding properly, for example like this:
export LANG=en_US.UTF-8""" % (
self.filename,
self.encoding,
)


class InvalidArgument(B2Error):
"""
Raised when one or more arguments are invalid
"""

def __init__(self, parameter_name, message):
"""
:param parameter_name: name of the function argument
:param message: brief explanation of misconfiguration
"""
super().__init__()
self.parameter_name = parameter_name
self.message = message

def __str__(self):
return "%s %s" % (self.parameter_name, self.message)


class UnsupportedFilename(B2Error):
"""
Raised when a filename is not supported by the scan operation
"""

def __init__(self, message, filename):
"""
:param message: brief explanation of why the filename was not supported
:param filename: name of the file which is not supported
"""
super().__init__()
self.filename = filename
self.message = message

def __str__(self):
return "%s: %s" % (self.message, self.filename)


@contextmanager
def check_invalid_argument(parameter_name: str, message: str,
*exceptions: Type[Exception]) -> Iterator[None]:
"""Raise `InvalidArgument` in case of one of given exception was thrown."""
try:
yield
except exceptions as exc:
if not message:
message = str(exc)
raise InvalidArgument(parameter_name, message) from exc


class BaseDirectoryError(B2SimpleError):
def __init__(self, path):
self.path = path
super().__init__(path)


class EmptyDirectory(BaseDirectoryError):
pass


class UnableToCreateDirectory(BaseDirectoryError):
pass


class NotADirectory(BaseDirectoryError):
pass
52 changes: 26 additions & 26 deletions b2sdk/sync/folder.py → b2sdk/scan/folder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
######################################################################
#
# File: b2sdk/sync/folder.py
# File: b2sdk/scan/folder.py
#
# Copyright 2019 Backblaze Inc. All Rights Reserved.
#
Expand All @@ -13,13 +13,13 @@
import platform
import re
import sys

from abc import ABCMeta, abstractmethod
from .exception import EmptyDirectory, EnvironmentEncodingError, UnSyncableFilename, NotADirectory, UnableToCreateDirectory
from .path import B2SyncPath, LocalSyncPath
from .report import SyncReport
from .scan_policies import DEFAULT_SCAN_MANAGER, ScanPoliciesManager

from ..utils import fix_windows_path_limit, get_file_mtime, is_file_readable
from .exception import EmptyDirectory, EnvironmentEncodingError, NotADirectory, UnableToCreateDirectory, UnsupportedFilename
from .path import B2Path, LocalPath
from .policies import DEFAULT_SCAN_MANAGER, ScanPoliciesManager
from .report import Report

DRIVE_MATCHER = re.compile(r"^([A-Za-z]):([/\\])")
ABSOLUTE_PATH_MATCHER = re.compile(r"^(/)|^(\\)")
Expand Down Expand Up @@ -50,7 +50,7 @@ class AbstractFolder(metaclass=ABCMeta):
"""

@abstractmethod
def all_files(self, reporter, policies_manager=DEFAULT_SCAN_MANAGER):
def all_files(self, reporter: Report, policies_manager=DEFAULT_SCAN_MANAGER):
"""
Return an iterator over all of the files in the folder, in
the order that B2 uses.
Expand Down Expand Up @@ -121,7 +121,7 @@ def folder_type(self):
"""
return 'local'

def all_files(self, reporter, policies_manager=DEFAULT_SCAN_MANAGER):
def all_files(self, reporter: Report, policies_manager=DEFAULT_SCAN_MANAGER):
"""
Yield all files.

Expand All @@ -148,7 +148,7 @@ def make_full_path(self, file_name):

# Ensure the new full_path is inside the self.root directory
if common_prefix != self.root:
raise UnSyncableFilename("illegal file name", full_path)
raise UnsupportedFilename("illegal file name", full_path)

return full_path

Expand All @@ -174,14 +174,14 @@ def ensure_non_empty(self):
raise EmptyDirectory(self.root)

def _walk_relative_paths(
self, local_dir: str, relative_dir_path: str, reporter: SyncReport,
self, local_dir: str, relative_dir_path: str, reporter,
policies_manager: ScanPoliciesManager
):
"""
Yield a File object for each of the files anywhere under this folder, in the
order they would appear in B2, unless the path is excluded by policies manager.

:param relative_dir_path: the path of this dir relative to the sync point, or '' if at sync point
:param relative_dir_path: the path of this dir relative to the scan point, or '' if at scan point
"""
if not isinstance(local_dir, str):
raise ValueError('folder path should be unicode: %s' % repr(local_dir))
Expand All @@ -208,15 +208,15 @@ def _walk_relative_paths(
name = self._handle_non_unicode_file_name(name)

if '/' in name:
raise UnSyncableFilename(
"sync does not support file names that include '/'",
raise UnsupportedFilename(
"scan does not support file names that include '/'",
"%s in dir %s" % (name, local_dir)
)

local_path = os.path.join(local_dir, name)
relative_file_path = join_b2_path(
relative_dir_path, name
) # file path relative to the sync point
) # file path relative to the scan point

# Skip broken symlinks or other inaccessible files
if not is_file_readable(local_path, reporter):
Expand Down Expand Up @@ -251,17 +251,17 @@ def _walk_relative_paths(
file_mod_time = get_file_mtime(local_path)
file_size = os.path.getsize(local_path)

local_sync_path = LocalSyncPath(
local_scan_path = LocalPath(
absolute_path=self.make_full_path(relative_file_path),
relative_path=relative_file_path,
mod_time=file_mod_time,
size=file_size,
)

if policies_manager.should_exclude_local_path(local_sync_path):
if policies_manager.should_exclude_local_path(local_scan_path):
continue

yield local_sync_path
yield local_scan_path

@classmethod
def _handle_non_unicode_file_name(cls, name):
Expand Down Expand Up @@ -312,7 +312,7 @@ def __init__(self, bucket_name, folder_name, api):
self.prefix = '' if self.folder_name == '' else self.folder_name + '/'

def all_files(
self, reporter: SyncReport, policies_manager: ScanPoliciesManager = DEFAULT_SCAN_MANAGER
self, reporter: Report, policies_manager: ScanPoliciesManager = DEFAULT_SCAN_MANAGER
):
"""
Yield all files.
Expand Down Expand Up @@ -346,7 +346,7 @@ def all_files(
self._validate_file_name(file_name)

if current_name != file_name and current_name is not None and current_versions:
yield B2SyncPath(
yield B2Path(
relative_path=current_name,
selected_version=current_versions[0],
all_versions=current_versions
Expand All @@ -357,7 +357,7 @@ def all_files(
current_versions.append(file_version)

if current_name is not None and current_versions:
yield B2SyncPath(
yield B2Path(
relative_path=current_name,
selected_version=current_versions[0],
all_versions=current_versions
Expand All @@ -374,18 +374,18 @@ def get_file_versions(self):
def _validate_file_name(self, file_name):
# Do not allow relative paths in file names
if RELATIVE_PATH_MATCHER.search(file_name):
raise UnSyncableFilename(
"sync does not support file names that include relative paths", file_name
raise UnsupportedFilename(
"scan does not support file names that include relative paths", file_name
)
# Do not allow absolute paths in file names
if ABSOLUTE_PATH_MATCHER.search(file_name):
raise UnSyncableFilename(
"sync does not support file names with absolute paths", file_name
raise UnsupportedFilename(
"scan does not support file names with absolute paths", file_name
)
# On Windows, do not allow drive letters in file names
if platform.system() == "Windows" and DRIVE_MATCHER.search(file_name):
raise UnSyncableFilename(
"sync does not support file names with drive letters", file_name
raise UnsupportedFilename(
"scan does not support file names with drive letters", file_name
)

def folder_type(self):
Expand Down
9 changes: 4 additions & 5 deletions b2sdk/sync/folder_parser.py → b2sdk/scan/folder_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
######################################################################
#
# File: b2sdk/sync/folder_parser.py
# File: b2sdk/scan/folder_parser.py
#
# Copyright 2019 Backblaze Inc. All Rights Reserved.
#
Expand All @@ -12,13 +12,12 @@
from .folder import B2Folder, LocalFolder


def parse_sync_folder(folder_name, api, local_folder_class=LocalFolder, b2_folder_class=B2Folder):
def parse_folder(folder_name, api, local_folder_class=LocalFolder, b2_folder_class=B2Folder):
"""
Take either a local path, or a B2 path, and returns a Folder
object for it.

B2 paths look like: b2://bucketName/path/name. The '//' is optional,
because the previous sync command didn't use it.
B2 paths look like: b2://bucketName/path/name. The '//' is optional.

Anything else is treated like a local folder.

Expand Down Expand Up @@ -52,4 +51,4 @@ def _parse_bucket_and_folder(bucket_and_path, api, b2_folder_class):
(bucket_name, folder_name) = bucket_and_path.split('/', 1)
if folder_name.endswith('/'):
folder_name = folder_name[:-1]
return b2_folder_class(bucket_name, folder_name, api)
return b2_folder_class(bucket_name, folder_name, api)
Loading