diff --git a/README.md b/README.md index 418a5c7..7905ba2 100644 --- a/README.md +++ b/README.md @@ -82,9 +82,22 @@ Example (sanitized): "token_provider_url": "https://", "token_public_keys_url": "https:///token/public-keys", "kafka_bootstrap_server": "broker1:9092,broker2:9092", - "event_bus_arn": "arn:aws:events:region:acct:event-bus/your-bus" + "event_bus_arn": "arn:aws:events:region:acct:event-bus/your-bus", + "ssl_ca_bundle": "/path/to/ca-bundle.pem" } ``` + +Configuration keys: +- `access_config` – local file path or S3 URI for access control map. +- `token_provider_url` – external URL for obtaining JWT tokens. +- `token_public_keys_url` – endpoint serving JWT verification public keys (RS256). +- `kafka_bootstrap_server` – comma-separated Kafka broker addresses. +- `event_bus_arn` – AWS EventBridge event bus ARN for EventBridge writer. +- `ssl_ca_bundle` (optional) – SSL certificate verification for S3 access and token public key requests. + - `true` - default, uses system CA bundle + - `false` - disables verification, not recommended for production + - `"/path/to/ca-bundle.pem"` - custom CA bundle + Supporting configs: - `access.json` – map: topicName -> array of authorized subjects (JWT `sub`). May reside locally or at S3 path referenced by `access_config`. - `topic_*.json` – each file contains a JSON Schema for a topic. In the current code these are explicitly loaded inside `event_gate_lambda.py`. (Future enhancement: auto-discover or index file.) diff --git a/conf/config.json b/conf/config.json index c35e61c..9770b27 100644 --- a/conf/config.json +++ b/conf/config.json @@ -3,5 +3,6 @@ "token_provider_url": "https://", "token_public_keys_url": "https://", "kafka_bootstrap_server": "localhost:9092", - "event_bus_arn": "arn:aws:events:" + "event_bus_arn": "arn:aws:events:", + "ssl_ca_bundle": "/path/to/ca-bundle.pem" } \ No newline at end of file diff --git a/scripts/notebook.ipynb b/scripts/notebook.ipynb deleted file mode 100644 index e493a97..0000000 --- a/scripts/notebook.ipynb +++ /dev/null @@ -1,170 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "b0ddfccc-0a08-4c37-89c0-fa144ef516e3", - "metadata": {}, - "outputs": [], - "source": [ - "# Set postgres secret\n", - "import os\n", - "\n", - "os.environ[\"POSTGRES_SECRET_NAME\"] = \"\"\n", - "os.environ[\"POSTGRES_SECRET_REGION\"] = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8e37945-f8c5-4769-bdde-226edeb8465f", - "metadata": {}, - "outputs": [], - "source": [ - "# Load lambda core\n", - "# Jump out of the \"Scripts\" box for the lambda source\n", - "import sys\n", - "import os\n", - "os.environ[\"LOG_LEVEL\"] = \"DEBUG\"\n", - "parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))\n", - "sys.path.insert(0, parent_dir)\n", - "if not os.path.exists('src'):\n", - " os.chdir('..')\n", - "import src.event_gate_lambda" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "df9dca66-984d-460b-acc6-3fe4ceb9ffc3", - "metadata": {}, - "outputs": [], - "source": [ - "# Set token for querying lambda\n", - "jwtToken = \"eyJhb\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ae218c5-8174-41bc-be5d-9487d68260c5", - "metadata": {}, - "outputs": [], - "source": [ - "# GET API\n", - "src.event_gate_lambda.lambda_handler({\n", - " \"httpMethod\": \"GET\",\n", - " \"resource\": \"/api\"\n", - "}, {})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67c10d73-8a23-41d6-9f20-7e23423e1c5e", - "metadata": {}, - "outputs": [], - "source": [ - "# GET TOKEN => path to token source\n", - "src.event_gate_lambda.lambda_handler({\n", - " \"httpMethod\": \"GET\",\n", - " \"resource\": \"/token\"\n", - "}, {})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0f3f6c4-7f6f-492b-82b5-f55ed8706a23", - "metadata": {}, - "outputs": [], - "source": [ - "# GET TOPICS\n", - "src.event_gate_lambda.lambda_handler({\n", - " \"httpMethod\": \"GET\",\n", - " \"resource\": \"/topics\"\n", - "}, {})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "639aeacb-4c73-48f1-b612-f56027765c38", - "metadata": {}, - "outputs": [], - "source": [ - "# GET TOPIC SCHEMA\n", - "src.event_gate_lambda.lambda_handler({\n", - " \"httpMethod\": \"GET\",\n", - " \"resource\": \"/topics/{topic_name}\",\n", - " \"pathParameters\": {\"topic_name\": \"public.cps.za.dlchange\"}\n", - "}, {})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5fce7b6c-1538-4cb2-987e-833e19cc8502", - "metadata": {}, - "outputs": [], - "source": [ - "# POST MESSAGE\n", - "import json\n", - "src.event_gate_lambda.lambda_handler({\n", - " \"httpMethod\": \"POST\",\n", - " \"resource\": \"/topics/{topic_name}\",\n", - " \"pathParameters\": {\"topic_name\": \"public.cps.za.dlchange\"},\n", - " \"headers\": {\"Authorization\": f\"Bearer {jwtToken}\"},\n", - " \"body\": json.dumps({\n", - " \"event_id\": \"JupyterEventId\",\n", - " \"tenant_id\": \"JupyterTenantId\",\n", - " \"source_app\": \"JupyterSrc\",\n", - " \"source_app_version\": \"v2025-05-20\",\n", - " \"environment\": \"JupyterEnv\",\n", - " \"timestamp_event\": 1729602770000,\n", - " \"catalog_id\": \"TestCatalog\",\n", - " \"operation\": \"delete\",\n", - " \"location\": \"UnitTest\",\n", - " \"format\": \"TestFormat\",\n", - " \"format_options\": {\"Foo\" : \"Bar\"}\n", - " })\n", - "}, {})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be25af3b-f164-4ecf-82c8-d0657290bab1", - "metadata": {}, - "outputs": [], - "source": [ - "# CYCLE LAMBDA ENVIRONMENT\n", - "src.event_gate_lambda.lambda_handler({\n", - " \"httpMethod\": \"POST\",\n", - " \"resource\": \"/terminate\"\n", - "}, {})" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/scripts/prepare.deplyoment.sh b/scripts/prepare.deplyoment.sh deleted file mode 100644 index 4d65ba7..0000000 --- a/scripts/prepare.deplyoment.sh +++ /dev/null @@ -1,7 +0,0 @@ -cd .. -pip3 install -r src/requirements.txt -t dependencies/ --platform manylinux2014_x86_64 --python-version 3.12 --only-binary=:all: -zip -r lambda_function.zip conf -cd src -zip -r ../lambda_function.zip . -cd ../dependencies -zip -r ../lambda_function.zip . diff --git a/src/event_gate_lambda.py b/src/event_gate_lambda.py index 33d4203..a18d0d9 100644 --- a/src/event_gate_lambda.py +++ b/src/event_gate_lambda.py @@ -23,11 +23,12 @@ import boto3 import jwt -import urllib3 +from botocore.exceptions import BotoCoreError, NoCredentialsError from jsonschema import validate from jsonschema.exceptions import ValidationError from src.handlers.handler_token import HandlerToken +from src.utils.constants import SSL_CA_BUNDLE_KEY from src.writers import writer_eventbridge, writer_kafka, writer_postgres from src.utils.conf_path import CONF_DIR, INVALID_CONF_ENV @@ -35,7 +36,6 @@ _CONF_DIR = CONF_DIR _INVALID_CONF_ENV = INVALID_CONF_ENV -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) logger = logging.getLogger(__name__) log_level = os.environ.get("LOG_LEVEL", "INFO") @@ -64,8 +64,14 @@ config = json.load(file) logger.debug("Loaded main CONFIG") -aws_s3 = boto3.Session().resource("s3", verify=False) # nosec Boto verify disabled intentionally -logger.debug("Initialized AWS S3 Client") +# Initialize S3 client with SSL verification +try: + ssl_verify = config.get(SSL_CA_BUNDLE_KEY, True) + aws_s3 = boto3.Session().resource("s3", verify=ssl_verify) + logger.debug("Initialized AWS S3 Client") +except (BotoCoreError, NoCredentialsError) as exc: + logger.exception("Failed to initialize AWS S3 client") + raise RuntimeError("AWS S3 client initialization failed") from exc if config["access_config"].startswith("s3://"): name_parts = config["access_config"].split("/") diff --git a/src/handlers/handler_token.py b/src/handlers/handler_token.py index 44579f1..1762728 100644 --- a/src/handlers/handler_token.py +++ b/src/handlers/handler_token.py @@ -30,7 +30,12 @@ from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey -from src.utils.constants import TOKEN_PROVIDER_URL_KEY, TOKEN_PUBLIC_KEYS_URL_KEY, TOKEN_PUBLIC_KEY_URL_KEY +from src.utils.constants import ( + TOKEN_PROVIDER_URL_KEY, + TOKEN_PUBLIC_KEYS_URL_KEY, + TOKEN_PUBLIC_KEY_URL_KEY, + SSL_CA_BUNDLE_KEY, +) logger = logging.getLogger(__name__) log_level = os.environ.get("LOG_LEVEL", "INFO") @@ -49,6 +54,7 @@ def __init__(self, config): self.public_keys_url: str = config.get(TOKEN_PUBLIC_KEYS_URL_KEY) or config.get(TOKEN_PUBLIC_KEY_URL_KEY) self.public_keys: list[RSAPublicKey] = [] self._last_loaded_at: datetime | None = None + self.ssl_ca_bundle: str | bool = config.get(SSL_CA_BUNDLE_KEY, True) def _refresh_keys_if_needed(self) -> None: """ @@ -79,7 +85,7 @@ def load_public_keys(self) -> "HandlerToken": logger.debug("Loading token public keys from %s", self.public_keys_url) try: - response_json = requests.get(self.public_keys_url, verify=False, timeout=5).json() + response_json = requests.get(self.public_keys_url, verify=self.ssl_ca_bundle, timeout=5).json() raw_keys: list[str] = [] if isinstance(response_json, dict): diff --git a/src/utils/constants.py b/src/utils/constants.py index 0d1eddb..ebbf762 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -18,7 +18,8 @@ This module contains all constants and enums used across the project. """ -# Token related configuration keys +# Configuration keys TOKEN_PROVIDER_URL_KEY = "token_provider_url" TOKEN_PUBLIC_KEY_URL_KEY = "token_public_key_url" TOKEN_PUBLIC_KEYS_URL_KEY = "token_public_keys_url" +SSL_CA_BUNDLE_KEY = "ssl_ca_bundle" diff --git a/tests/handlers/test_handler_token.py b/tests/handlers/test_handler_token.py index c191c4e..2761f95 100644 --- a/tests/handlers/test_handler_token.py +++ b/tests/handlers/test_handler_token.py @@ -142,3 +142,17 @@ def test_decode_jwt_triggers_refresh_check(token_handler): with patch("jwt.decode", return_value={"sub": "TestUser"}): token_handler.decode_jwt("dummy-token") mock_refresh.assert_called_once() + + +def test_handler_token_default_ssl_ca_bundle(): + """HandlerToken should default to True for ssl_ca_bundle when not specified.""" + config = {"token_public_keys_url": "https://example.com/keys"} + handler = HandlerToken(config) + assert handler.ssl_ca_bundle is True + + +def test_handler_token_custom_ssl_ca_bundle_path(): + """HandlerToken should accept custom CA bundle path.""" + config = {"token_public_keys_url": "https://example.com/keys", "ssl_ca_bundle": "/path/to/custom/ca-bundle.pem"} + handler = HandlerToken(config) + assert handler.ssl_ca_bundle == "/path/to/custom/ca-bundle.pem" diff --git a/tests/test_event_gate_lambda.py b/tests/test_event_gate_lambda.py index e6683b6..6f87a2a 100644 --- a/tests/test_event_gate_lambda.py +++ b/tests/test_event_gate_lambda.py @@ -15,7 +15,7 @@ # import json -from unittest.mock import patch +from unittest.mock import patch, MagicMock # --- GET flows --- @@ -230,3 +230,31 @@ def test_post_invalid_json_body(event_gate_module, make_event): assert resp["statusCode"] == 500 body = json.loads(resp["body"]) assert any(e["type"] == "internal" for e in body["errors"]) # internal error path + + +def test_boto3_s3_client_default_ssl_verification(): + """Test that boto3 S3 client uses default SSL verification when ssl_ca_bundle not specified.""" + config = {} + + with patch("boto3.Session") as mock_session: + mock_session_instance = MagicMock() + mock_session.return_value = mock_session_instance + + ssl_verify = config.get("ssl_ca_bundle", True) + mock_session_instance.resource("s3", verify=ssl_verify) + + mock_session_instance.resource.assert_called_once_with("s3", verify=True) + + +def test_boto3_s3_client_custom_ca_bundle(): + """Test that boto3 S3 client uses custom CA bundle when ssl_ca_bundle is specified.""" + config = {"ssl_ca_bundle": "/path/to/custom-ca-bundle.pem"} + + with patch("boto3.Session") as mock_session: + mock_session_instance = MagicMock() + mock_session.return_value = mock_session_instance + + ssl_verify = config.get("ssl_ca_bundle", True) + mock_session_instance.resource("s3", verify=ssl_verify) + + mock_session_instance.resource.assert_called_once_with("s3", verify="/path/to/custom-ca-bundle.pem")