From 1fa6ee295484fef141bf16cf1ec3618506c17c7e Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Mon, 16 Feb 2026 18:52:06 +0100 Subject: [PATCH 1/4] Add lambda-streaming-download-s3 pattern --- lambda-s3-download/README.md | 96 +++++++++++++++++++++++++ lambda-s3-download/example-pattern.json | 60 ++++++++++++++++ lambda-s3-download/src/app.py | 66 +++++++++++++++++ lambda-s3-download/src/requirements.txt | 6 ++ lambda-s3-download/template.yaml | 31 ++++++++ 5 files changed, 259 insertions(+) create mode 100644 lambda-s3-download/README.md create mode 100644 lambda-s3-download/example-pattern.json create mode 100644 lambda-s3-download/src/app.py create mode 100644 lambda-s3-download/src/requirements.txt create mode 100644 lambda-s3-download/template.yaml diff --git a/lambda-s3-download/README.md b/lambda-s3-download/README.md new file mode 100644 index 000000000..72289449c --- /dev/null +++ b/lambda-s3-download/README.md @@ -0,0 +1,96 @@ +# Lambda S3 Download + +This pattern deploys a Lambda function that downloads a file from a URL and uploads it to an S3 bucket using multipart upload. It streams the file in configurable chunks through `/tmp`, making it capable of handling files larger than Lambda's memory and storage limits. + +Important: this application uses various AWS services and there are costs associated with these services after the Free Tier usage - please see the [AWS Pricing page](https://aws.amazon.com/pricing/) for details. You are responsible for any AWS costs incurred. No warranty is implied in this example. + +## Requirements + +* [Create an AWS account](https://portal.aws.amazon.com/gp/aws/developer/registration/index.html) if you do not already have one and log in. The IAM user that you use must have sufficient permissions to make necessary AWS service calls and manage AWS resources. +* [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) installed and configured +* [Git Installed](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) +* [AWS Serverless Application Model](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/serverless-sam-cli-install.html) (AWS SAM) installed + +## Deployment Instructions + +1. Create a new directory, navigate to that directory in a terminal and clone the GitHub repository: + ``` + git clone https://github.com/aws-samples/serverless-patterns + ``` +1. Change directory to the pattern directory: + ``` + cd serverless-patterns/lambda-s3-download + ``` +1. Build the application: + ``` + sam build + ``` +1. Deploy the application: + ``` + sam deploy --guided + ``` +1. During the prompts: + * Enter a stack name + * Enter the desired AWS Region + * Enter the target S3 bucket name (the bucket must already exist) + * Allow SAM CLI to create IAM roles with the required permissions + + Once you have run `sam deploy --guided` mode once and saved arguments to a configuration file (samconfig.toml), you can use `sam deploy` in future to use these defaults. + +1. Note the outputs from the SAM deployment process. These contain the resource names and/or ARNs which are used for testing. + +## How it works + +The Lambda function: + +1. Receives a download URL and filename via the event payload +2. Initiates an S3 multipart upload with SHA256 checksums +3. Streams the file from the URL in chunks (default 128 MB), writing each chunk to `/tmp` and uploading it as a multipart part +4. Cleans up each chunk from `/tmp` after uploading to stay within the 10 GB ephemeral storage limit +5. Completes the multipart upload and returns the S3 object checksum +6. If any step fails, aborts the multipart upload to avoid orphaned parts + +The function is configured with a 15-minute timeout, 1 GB memory, and 10 GB ephemeral storage. + +## Testing + +Invoke the Lambda function with a test event: + +```bash +aws lambda invoke \ + --function-name FUNCTION_NAME \ + --cli-binary-format raw-in-base64-out \ + --payload '{ + "download_url": "https://example.com/file.zip", + "download_filename": "file.zip" + }' \ + response.json +``` + +Optional event parameters: + +| Parameter | Description | Default | +|---|---|---| +| `target_bucket` | S3 bucket name (overrides the deployed parameter) | Value from template parameter | +| `target_bucket_region` | S3 bucket region | Lambda's region | +| `chunk_size_mb` | Size of each download chunk in MB (clamped between 5 and 5120) | 128 | + +## Known Limitations + +- The Lambda function has a 15-minute maximum timeout. If the download and upload combined take longer than that, the function will be killed mid-stream and the multipart upload will be left incomplete. Consider setting an [S3 lifecycle rule](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpu-abort-incomplete-mpu-lifecycle-config.html) on the target bucket to auto-clean incomplete multipart uploads. +- The `download_filename` should be a flat filename (e.g. `file.zip`). If it contains slashes (e.g. `path/to/file.zip`), the temporary file path in `/tmp` will include subdirectories that may not exist, causing a write failure. + +## Cleanup + +1. Delete the stack + ```bash + aws cloudformation delete-stack --stack-name STACK_NAME + ``` +1. Confirm the stack has been deleted + ```bash + aws cloudformation list-stacks --query "StackSummaries[?contains(StackName,'STACK_NAME')].StackStatus" + ``` +---- +Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. + +SPDX-License-Identifier: MIT-0 diff --git a/lambda-s3-download/example-pattern.json b/lambda-s3-download/example-pattern.json new file mode 100644 index 000000000..fc07c7fe1 --- /dev/null +++ b/lambda-s3-download/example-pattern.json @@ -0,0 +1,60 @@ +{ + "title": "Lambda S3 Download", + "description": "A Lambda function that downloads a file from a URL and uploads it to S3 using multipart upload with SHA256 checksums.", + "language": "Python", + "level": "300", + "framework": "SAM", + "introBox": { + "headline": "How it works", + "text": [ + "This pattern deploys a Lambda function that streams a file from a URL and uploads it to an S3 bucket using multipart upload.", + "The file is downloaded in configurable chunks (default 128 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.", + "SHA256 checksums are calculated for each part and verified on completion. If any step fails, the multipart upload is automatically aborted to avoid orphaned parts." + ] + }, + "gitHub": { + "template": { + "repoURL": "https://github.com/aws-samples/serverless-patterns/tree/main/lambda-s3-download", + "templateURL": "serverless-patterns/lambda-s3-download", + "projectFolder": "lambda-s3-download", + "templateFile": "template.yaml" + } + }, + "resources": { + "bullets": [ + { + "text": "S3 Multipart Upload Overview", + "link": "https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html" + }, + { + "text": "AWS Lambda - Configuring Ephemeral Storage", + "link": "https://docs.aws.amazon.com/lambda/latest/dg/configuration-ephemeral-storage.html" + } + ] + }, + "deploy": { + "text": [ + "sam build", + "sam deploy --guided" + ] + }, + "testing": { + "text": [ + "See the GitHub repo for detailed testing instructions." + ] + }, + "cleanup": { + "text": [ + "Delete the stack: aws cloudformation delete-stack --stack-name STACK_NAME." + ] + }, + "authors": [ + { + "name": "Robert Meyer", + "image": "https://serverlessland.com/assets/images/resources/contributors/ext-robert-meyer.jpg", + "bio": "Robert is a Partner Solutions Architect with AWS in EMEA.", + "linkedin": "https://www.linkedin.com/in/robert-meyer-phd-6a114a58/", + "twitter": "@robl_on_tour" + } + ] +} diff --git a/lambda-s3-download/src/app.py b/lambda-s3-download/src/app.py new file mode 100644 index 000000000..733ff87ec --- /dev/null +++ b/lambda-s3-download/src/app.py @@ -0,0 +1,66 @@ +import requests +import boto3 +import json +import os +from pathlib import Path + + +def lambda_handler(event, context): + + target_bucket = event.get("target_bucket", os.environ["TARGET_BUCKET"]) + target_bucket_region = event.get("target_bucket_region", os.environ.get("AWS_REGION")) + + download_url = event["download_url"] + download_filename = event["download_filename"] + + # Cap chunk size under 5 GB to be inside S3 max part size and not exhaust max Lambda memory + # Floor chunk size at 5 MB to fit the S3 minimum part size + chunk_size_mb = min(max(int(event.get("chunk_size_mb", 128)), 5), 5120) + + # open a multipart s3 upload request. + s3 = boto3.client("s3", region_name = target_bucket_region) + upload_request = s3.create_multipart_upload(Bucket=target_bucket, Key=download_filename, ChecksumAlgorithm="SHA256") + upload_id = upload_request["UploadId"] + part_number = 0 + parts = [] + + try: + with requests.get(download_url, stream=True) as download_request: + + for chunk in download_request.iter_content(chunk_size=chunk_size_mb*1024*1024): + part_number = part_number + 1 + download_target = Path("/tmp", download_filename + "_" + str(part_number)) + + with download_target.open('wb') as download_file: + download_file.write(chunk) + download_file.close() + + with download_target.open('rb') as download_file: + part_upload = s3.upload_part(Body=download_file, Bucket=target_bucket, Key=download_filename, PartNumber=part_number, UploadId=upload_id, ChecksumAlgorithm="SHA256") + parts.append({'ETag': part_upload['ETag'], 'ChecksumSHA256': part_upload['ChecksumSHA256'], 'PartNumber': part_number}) + download_file.close() + + download_target.unlink() + + s3.complete_multipart_upload(Bucket=target_bucket, Key=download_filename, MultipartUpload={'Parts': parts}, UploadId=upload_id) + objectSummary = s3.get_object_attributes(Bucket=target_bucket,Key=download_filename, ObjectAttributes=['Checksum']) + + return { + "statusCode": 200, + "body": json.dumps({ + "message": f"{download_filename} uploaded successfully", + "bucket": target_bucket, + "key": download_filename, + "checksum_sha256": objectSummary["Checksum"]["ChecksumSHA256"], + "parts": len(parts) + }) + } + + except Exception as e: + s3.abort_multipart_upload(Bucket=target_bucket, Key=download_filename, UploadId=upload_id) + return { + "statusCode": 500, + "body": json.dumps({"message": f"Download/Upload failed: {str(e)}"}) + } + + diff --git a/lambda-s3-download/src/requirements.txt b/lambda-s3-download/src/requirements.txt new file mode 100644 index 000000000..245db934d --- /dev/null +++ b/lambda-s3-download/src/requirements.txt @@ -0,0 +1,6 @@ +boto3 +json +os +Path +requests + diff --git a/lambda-s3-download/template.yaml b/lambda-s3-download/template.yaml new file mode 100644 index 000000000..8c40ab8b9 --- /dev/null +++ b/lambda-s3-download/template.yaml @@ -0,0 +1,31 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: Lambda function that downloads a file from a URL and uploads it to S3 using multipart upload + +Parameters: + TargetBucketName: + Type: String + Description: Name of the S3 bucket to upload files to + +Resources: + DownloadFunction: + Type: AWS::Serverless::Function + Properties: + Handler: app.lambda_handler + Runtime: python3.12 + CodeUri: src/ + Timeout: 900 + MemorySize: 1024 + EphemeralStorage: + Size: 10240 + Environment: + Variables: + TARGET_BUCKET: !Ref TargetBucketName + Policies: + - S3CrudPolicy: + BucketName: !Ref TargetBucketName + +Outputs: + DownloadFunctionArn: + Description: Lambda function ARN + Value: !GetAtt DownloadFunction.Arn From 25fd15eaa4b5b3973dbafd50a5e246f7e089b661 Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Mon, 16 Feb 2026 19:12:51 +0100 Subject: [PATCH 2/4] feat: Add arm64 support, default chunk size to 512 MB, extract filename from URL fallback --- lambda-s3-download/README.md | 5 +++-- lambda-s3-download/example-pattern.json | 2 +- lambda-s3-download/src/app.py | 5 +++-- lambda-s3-download/src/requirements.txt | 5 ----- lambda-s3-download/template.yaml | 2 ++ 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/lambda-s3-download/README.md b/lambda-s3-download/README.md index 72289449c..cac3c1c75 100644 --- a/lambda-s3-download/README.md +++ b/lambda-s3-download/README.md @@ -45,7 +45,7 @@ The Lambda function: 1. Receives a download URL and filename via the event payload 2. Initiates an S3 multipart upload with SHA256 checksums -3. Streams the file from the URL in chunks (default 128 MB), writing each chunk to `/tmp` and uploading it as a multipart part +3. Streams the file from the URL in chunks (default 512 MB), writing each chunk to `/tmp` and uploading it as a multipart part 4. Cleans up each chunk from `/tmp` after uploading to stay within the 10 GB ephemeral storage limit 5. Completes the multipart upload and returns the S3 object checksum 6. If any step fails, aborts the multipart upload to avoid orphaned parts @@ -73,12 +73,13 @@ Optional event parameters: |---|---|---| | `target_bucket` | S3 bucket name (overrides the deployed parameter) | Value from template parameter | | `target_bucket_region` | S3 bucket region | Lambda's region | -| `chunk_size_mb` | Size of each download chunk in MB (clamped between 5 and 5120) | 128 | +| `chunk_size_mb` | Size of each download chunk in MB (clamped between 5 and 5120) | 512 | ## Known Limitations - The Lambda function has a 15-minute maximum timeout. If the download and upload combined take longer than that, the function will be killed mid-stream and the multipart upload will be left incomplete. Consider setting an [S3 lifecycle rule](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpu-abort-incomplete-mpu-lifecycle-config.html) on the target bucket to auto-clean incomplete multipart uploads. - The `download_filename` should be a flat filename (e.g. `file.zip`). If it contains slashes (e.g. `path/to/file.zip`), the temporary file path in `/tmp` will include subdirectories that may not exist, causing a write failure. +- The maximum downloadable file size is limited by the 15-minute Lambda timeout, not by S3 (which supports up to 5 TB via multipart upload with 10,000 parts). In practice, Lambda can usually download roughly 55-110 GB in 15 minutes depending on network speed between Lambda and the source URL, so your mileage may vary. At the default chunk size of 512 MB, the 10,000 parts limit allows up to ~5 TB. ## Cleanup diff --git a/lambda-s3-download/example-pattern.json b/lambda-s3-download/example-pattern.json index fc07c7fe1..c4678eab6 100644 --- a/lambda-s3-download/example-pattern.json +++ b/lambda-s3-download/example-pattern.json @@ -8,7 +8,7 @@ "headline": "How it works", "text": [ "This pattern deploys a Lambda function that streams a file from a URL and uploads it to an S3 bucket using multipart upload.", - "The file is downloaded in configurable chunks (default 128 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.", + "The file is downloaded in configurable chunks (default 512 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.", "SHA256 checksums are calculated for each part and verified on completion. If any step fails, the multipart upload is automatically aborted to avoid orphaned parts." ] }, diff --git a/lambda-s3-download/src/app.py b/lambda-s3-download/src/app.py index 733ff87ec..fd6670a70 100644 --- a/lambda-s3-download/src/app.py +++ b/lambda-s3-download/src/app.py @@ -3,6 +3,7 @@ import json import os from pathlib import Path +from urllib.parse import urlparse def lambda_handler(event, context): @@ -11,11 +12,11 @@ def lambda_handler(event, context): target_bucket_region = event.get("target_bucket_region", os.environ.get("AWS_REGION")) download_url = event["download_url"] - download_filename = event["download_filename"] + download_filename = event.get("download_filename", urlparse(download_url).path.split("/")[-1]) # Cap chunk size under 5 GB to be inside S3 max part size and not exhaust max Lambda memory # Floor chunk size at 5 MB to fit the S3 minimum part size - chunk_size_mb = min(max(int(event.get("chunk_size_mb", 128)), 5), 5120) + chunk_size_mb = min(max(int(event.get("chunk_size_mb", 512)), 5), 5120) # open a multipart s3 upload request. s3 = boto3.client("s3", region_name = target_bucket_region) diff --git a/lambda-s3-download/src/requirements.txt b/lambda-s3-download/src/requirements.txt index 245db934d..f2293605c 100644 --- a/lambda-s3-download/src/requirements.txt +++ b/lambda-s3-download/src/requirements.txt @@ -1,6 +1 @@ -boto3 -json -os -Path requests - diff --git a/lambda-s3-download/template.yaml b/lambda-s3-download/template.yaml index 8c40ab8b9..97dfd0a38 100644 --- a/lambda-s3-download/template.yaml +++ b/lambda-s3-download/template.yaml @@ -14,6 +14,8 @@ Resources: Handler: app.lambda_handler Runtime: python3.12 CodeUri: src/ + Architectures: + - arm64 Timeout: 900 MemorySize: 1024 EphemeralStorage: From 4e11128c08a146a949d0ed72139c66e0eabc30c5 Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Mon, 16 Feb 2026 19:18:13 +0100 Subject: [PATCH 3/4] docs: Update README with additional known limitations --- lambda-s3-download/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/lambda-s3-download/README.md b/lambda-s3-download/README.md index cac3c1c75..bf38efc49 100644 --- a/lambda-s3-download/README.md +++ b/lambda-s3-download/README.md @@ -80,6 +80,7 @@ Optional event parameters: - The Lambda function has a 15-minute maximum timeout. If the download and upload combined take longer than that, the function will be killed mid-stream and the multipart upload will be left incomplete. Consider setting an [S3 lifecycle rule](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpu-abort-incomplete-mpu-lifecycle-config.html) on the target bucket to auto-clean incomplete multipart uploads. - The `download_filename` should be a flat filename (e.g. `file.zip`). If it contains slashes (e.g. `path/to/file.zip`), the temporary file path in `/tmp` will include subdirectories that may not exist, causing a write failure. - The maximum downloadable file size is limited by the 15-minute Lambda timeout, not by S3 (which supports up to 5 TB via multipart upload with 10,000 parts). In practice, Lambda can usually download roughly 55-110 GB in 15 minutes depending on network speed between Lambda and the source URL, so your mileage may vary. At the default chunk size of 512 MB, the 10,000 parts limit allows up to ~5 TB. +- This pattern always uses multipart upload, even for small files. For files under 5 MB, this results in 3 PUT requests (CreateMultipartUpload + UploadPart + CompleteMultipartUpload) instead of a single PutObject call. The cost difference in that case is negligible (fractions of a cent), but can compound if done often enough. ## Cleanup From 5eecb749b5473edc7e17a6e8c2858675065c353d Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Thu, 26 Feb 2026 09:45:14 +0100 Subject: [PATCH 4/4] Update SAM template and function code --- lambda-s3-download/README.md | 6 +++--- lambda-s3-download/example-pattern.json | 6 +++--- lambda-s3-download/src/app.py | 2 +- lambda-s3-download/template.yaml | 6 ++---- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/lambda-s3-download/README.md b/lambda-s3-download/README.md index bf38efc49..7a51fc77c 100644 --- a/lambda-s3-download/README.md +++ b/lambda-s3-download/README.md @@ -1,6 +1,6 @@ -# Lambda S3 Download +# AWS Lambda to Amazon S3 — URL File Downloader -This pattern deploys a Lambda function that downloads a file from a URL and uploads it to an S3 bucket using multipart upload. It streams the file in configurable chunks through `/tmp`, making it capable of handling files larger than Lambda's memory and storage limits. +This pattern deploys an AWS Lambda function that downloads a file from a URL and stores it in Amazon S3 using multipart upload. It streams the file in configurable chunks through `/tmp`, making it capable of handling files larger than Lambda's memory and storage limits. Important: this application uses various AWS services and there are costs associated with these services after the Free Tier usage - please see the [AWS Pricing page](https://aws.amazon.com/pricing/) for details. You are responsible for any AWS costs incurred. No warranty is implied in this example. @@ -93,6 +93,6 @@ Optional event parameters: aws cloudformation list-stacks --query "StackSummaries[?contains(StackName,'STACK_NAME')].StackStatus" ``` ---- -Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. SPDX-License-Identifier: MIT-0 diff --git a/lambda-s3-download/example-pattern.json b/lambda-s3-download/example-pattern.json index c4678eab6..d741958c3 100644 --- a/lambda-s3-download/example-pattern.json +++ b/lambda-s3-download/example-pattern.json @@ -1,13 +1,13 @@ { - "title": "Lambda S3 Download", - "description": "A Lambda function that downloads a file from a URL and uploads it to S3 using multipart upload with SHA256 checksums.", + "title": "AWS Lambda to Amazon S3 — URL File Downloader", + "description": "An AWS Lambda function that downloads a file from a URL and stores it in Amazon S3 using multipart upload with SHA256 checksums.", "language": "Python", "level": "300", "framework": "SAM", "introBox": { "headline": "How it works", "text": [ - "This pattern deploys a Lambda function that streams a file from a URL and uploads it to an S3 bucket using multipart upload.", + "This pattern deploys an AWS Lambda function that streams a file from a URL and stores it in Amazon S3 using multipart upload.", "The file is downloaded in configurable chunks (default 512 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.", "SHA256 checksums are calculated for each part and verified on completion. If any step fails, the multipart upload is automatically aborted to avoid orphaned parts." ] diff --git a/lambda-s3-download/src/app.py b/lambda-s3-download/src/app.py index fd6670a70..575a145ed 100644 --- a/lambda-s3-download/src/app.py +++ b/lambda-s3-download/src/app.py @@ -49,7 +49,7 @@ def lambda_handler(event, context): return { "statusCode": 200, "body": json.dumps({ - "message": f"{download_filename} uploaded successfully", + "message": f"{download_filename} downloaded and stored successfully", "bucket": target_bucket, "key": download_filename, "checksum_sha256": objectSummary["Checksum"]["ChecksumSHA256"], diff --git a/lambda-s3-download/template.yaml b/lambda-s3-download/template.yaml index 97dfd0a38..7ba36b631 100644 --- a/lambda-s3-download/template.yaml +++ b/lambda-s3-download/template.yaml @@ -1,6 +1,6 @@ AWSTemplateFormatVersion: '2010-09-09' Transform: AWS::Serverless-2016-10-31 -Description: Lambda function that downloads a file from a URL and uploads it to S3 using multipart upload +Description: AWS Lambda function that downloads a file from a URL and stores it in Amazon S3 using multipart upload Parameters: TargetBucketName: @@ -14,8 +14,6 @@ Resources: Handler: app.lambda_handler Runtime: python3.12 CodeUri: src/ - Architectures: - - arm64 Timeout: 900 MemorySize: 1024 EphemeralStorage: @@ -29,5 +27,5 @@ Resources: Outputs: DownloadFunctionArn: - Description: Lambda function ARN + Description: Serverless Downloader Lambda function ARN Value: !GetAtt DownloadFunction.Arn