-
Notifications
You must be signed in to change notification settings - Fork 48
Add a chunk_size setting #1318
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a chunk_size setting #1318
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| Change the default chunking value to "infinite". If users wants to, they can still specify a value based on their needs. | ||
| Removed the default `chunk_size` value. If necessary, a suitable value should be provided with the call or configured for the profile. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Allow to specify `None` for the `chunk_size` of content upload commands to disable chunking. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,12 +45,15 @@ class PulpArtifactContext(PulpEntityContext): | |
| ID_PREFIX = "artifacts" | ||
|
|
||
| def upload( | ||
| self, file: t.IO[bytes], chunk_size: int = 1000000, sha256: str | None = None | ||
| self, | ||
| file: t.IO[bytes], | ||
| chunk_size: int | None = None, | ||
| sha256: str | None = None, | ||
| ) -> t.Any: | ||
| size = os.path.getsize(file.name) | ||
|
|
||
| sha256_hasher = hashlib.sha256() | ||
| for chunk in iter(lambda: file.read(chunk_size), b""): | ||
| for chunk in iter(lambda: file.read(10_000_000), b""): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any particular reason for 10Mb here? Or is it just "a reasonable amount at a time" to hash?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not any sort of chunk to be used on the network. Here we only hash the file to secure the upload later.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right - I was just curious if 10Mb was "magic" in some way, or just "a reasonable amount" :) |
||
| sha256_hasher.update(chunk) | ||
| sha256_digest = sha256_hasher.hexdigest() | ||
| file.seek(0) | ||
|
|
@@ -71,8 +74,8 @@ def upload( | |
| self._entity = {"pulp_href": "<FAKE_ENTITY>", "sha256": sha256, "size": size} | ||
| self._entity_lookup = {} | ||
| return self._entity["pulp_href"] | ||
| if chunk_size > size: | ||
| # if chunk_size is bigger than the file size, just upload it directly | ||
| if chunk_size is None or chunk_size > size: | ||
| # upload it directly | ||
| artifact: dict[str, t.Any] = self.create({"sha256": sha256_digest, "file": file}) | ||
| self.pulp_href = artifact["pulp_href"] | ||
| return artifact["pulp_href"] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,18 @@ | |
| from pulp_glue.common.i18n import get_translation | ||
| from pulp_glue.common.openapi import AuthProviderBase | ||
|
|
||
| if sys.version_info >= (3, 13): | ||
| from warnings import deprecated | ||
| else: | ||
| T = t.TypeVar("T") | ||
|
|
||
| def deprecated(s: str) -> t.Callable[[T], T]: | ||
| def _inner(f: T) -> T: | ||
| return f | ||
|
|
||
| return _inner | ||
|
|
||
|
|
||
| try: | ||
| from pygments import highlight | ||
| from pygments.formatters import Terminal256Formatter | ||
|
|
@@ -67,7 +79,6 @@ def _unset(value: t.Any) -> bool: | |
| translation = get_translation(__package__) | ||
| _ = translation.gettext | ||
|
|
||
|
|
||
| _AnyCallable = t.Callable[..., t.Any] | ||
| FC = t.TypeVar("FC", bound=_AnyCallable | click.Command) | ||
|
|
||
|
|
@@ -156,6 +167,7 @@ def __init__( | |
| password: str | None = None, | ||
| oauth2_client_id: str | None = None, | ||
| oauth2_client_secret: str | None = None, | ||
| chunk_size: int | None = None, | ||
| ) -> None: | ||
| self.username = username | ||
| self.password = password | ||
|
|
@@ -172,6 +184,7 @@ def __init__( | |
| background_tasks=background_tasks, | ||
| timeout=timeout, | ||
| domain=domain, | ||
| chunk_size=chunk_size, | ||
| ) | ||
| self.format = format | ||
|
|
||
|
|
@@ -717,9 +730,9 @@ def _callback( | |
| units = {"B": 1, "KB": 10**3, "MB": 10**6, "GB": 10**9, "TB": 10**12} | ||
|
|
||
|
|
||
| def parse_size_callback(ctx: click.Context, param: click.Parameter, value: str | None) -> int: | ||
| def parse_size(value: str | None) -> int | None: | ||
| if value is None: | ||
| return 8 * 10**9 | ||
| return None | ||
| size = value.strip().upper() | ||
| match = re.match(r"^([0-9]+)\s*([KMGT]?B)?$", size) | ||
| if not match: | ||
|
|
@@ -728,6 +741,18 @@ def parse_size_callback(ctx: click.Context, param: click.Parameter, value: str | | |
| return int(float(number) * units[unit]) | ||
|
|
||
|
|
||
| def chunk_size_callback( | ||
| ctx: click.Context, param: click.Parameter, value: str | None | ||
| ) -> int | None: | ||
| if value == "": | ||
| # Actually override the default. | ||
| return None | ||
|
Comment on lines
+747
to
+749
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we push these into parse_size()?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We commonly "use" |
||
| return parse_size(value) | ||
|
|
||
|
|
||
| parse_size_callback = deprecated("Use 'chunk_size_callback' instead.")(chunk_size_callback) | ||
|
|
||
|
|
||
| def null_callback(ctx: click.Context, param: click.Parameter, value: str | None) -> str | None: | ||
| if value == "": | ||
| return "null" | ||
|
|
@@ -1220,7 +1245,7 @@ def _type_callback(ctx: click.Context, param: click.Parameter, value: str | None | |
| "--chunk-size", | ||
| help=_("Chunk size to break up {entity} into. Defaults to not chunking at all."), | ||
| default=None, | ||
| callback=parse_size_callback, | ||
| callback=chunk_size_callback, | ||
| ) | ||
|
|
||
| pulp_created_gte_option = pulp_option( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tell me what the intent is here? I'm missing something
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is unrelated. But it's a safeguard that the repository this content context is tied to here belongs to the same server (pulp_ctx).