diff --git a/.github/workflows/paradedb.yml b/.github/workflows/paradedb.yml new file mode 100644 index 0000000..51c53da --- /dev/null +++ b/.github/workflows/paradedb.yml @@ -0,0 +1,53 @@ +name: LocalStack ParadeDB Extension Tests + +on: + push: + paths: + - paradedb/** + branches: + - main + pull_request: + paths: + - .github/workflows/paradedb.yml + - paradedb/** + workflow_dispatch: + +env: + LOCALSTACK_DISABLE_EVENTS: "1" + LOCALSTACK_AUTH_TOKEN: ${{ secrets.LOCALSTACK_AUTH_TOKEN }} + +jobs: + integration-tests: + name: Run Integration Tests + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup LocalStack and extension + run: | + cd paradedb + + docker pull localstack/localstack-pro & + docker pull paradedb/paradedb & + pip install localstack + + make install + make lint + make dist + localstack extensions -v install file://$(ls ./dist/localstack_extension_paradedb-*.tar.gz) + + DEBUG=1 localstack start -d + localstack wait + + - name: Run integration tests + run: | + cd paradedb + make test + + - name: Print logs + if: always() + run: | + localstack logs + localstack stop diff --git a/README.md b/README.md index 6cf4345..123d77d 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ You can install the respective extension by calling `localstack extensions insta | [Stripe](https://github.com/localstack/localstack-extensions/tree/main/stripe) | localstack-extension-stripe | 0.2.0 | Stable | | [Terraform Init](https://github.com/localstack/localstack-extensions/tree/main/terraform-init) | localstack-extension-terraform-init | 0.2.0 | Experimental | | [TypeDB](https://github.com/localstack/localstack-extensions/tree/main/typedb) | localstack-extension-typedb | 0.1.3 | Experimental | +| [ParadeDB](https://github.com/localstack/localstack-extensions/tree/main/paradedb) | localstack-extension-paradedb | 0.1.0 | Experimental | ## Developing Extensions diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..edf2f11 --- /dev/null +++ b/flake.nix @@ -0,0 +1,22 @@ +{ + description = "localstack-extensions"; + + inputs = { + nixpkgs.url = "nixpkgs/nixpkgs-unstable"; + }; + + outputs = { self, nixpkgs }@inputs: + ( + let + forAllSystems = nixpkgs.lib.genAttrs nixpkgs.lib.platforms.all; + in + { + devShell = forAllSystems (system: + let pkgs = import nixpkgs { inherit system; }; in + pkgs.mkShell { + buildInputs = with pkgs; [ uv python311 python311Packages.pip ty ]; + } + ); + } + ); +} diff --git a/paradedb/.gitignore b/paradedb/.gitignore new file mode 100644 index 0000000..1808cca --- /dev/null +++ b/paradedb/.gitignore @@ -0,0 +1,5 @@ +.venv +dist +build +**/*.egg-info +.eggs diff --git a/paradedb/Makefile b/paradedb/Makefile new file mode 100644 index 0000000..dea9383 --- /dev/null +++ b/paradedb/Makefile @@ -0,0 +1,48 @@ +VENV_BIN = python3 -m venv +VENV_DIR ?= .venv +VENV_ACTIVATE = $(VENV_DIR)/bin/activate +VENV_RUN = . $(VENV_ACTIVATE) +TEST_PATH ?= tests + +usage: ## Shows usage for this Makefile + @cat Makefile | grep -E '^[a-zA-Z_-]+:.*?## .*$$' | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}' + +venv: $(VENV_ACTIVATE) + +$(VENV_ACTIVATE): pyproject.toml + test -d .venv || $(VENV_BIN) .venv + $(VENV_RUN); pip install --upgrade pip setuptools plux + $(VENV_RUN); pip install -e .[dev] + touch $(VENV_DIR)/bin/activate + +clean: + rm -rf .venv/ + rm -rf build/ + rm -rf .eggs/ + rm -rf *.egg-info/ + +install: venv ## Install dependencies + $(VENV_RUN); python -m plux entrypoints + +dist: venv ## Create distribution + $(VENV_RUN); python -m build + +publish: clean-dist venv dist ## Publish extension to pypi + $(VENV_RUN); pip install --upgrade twine; twine upload dist/* + +entrypoints: venv ## Generate plugin entrypoints for Python package + $(VENV_RUN); python -m plux entrypoints + +format: ## Run ruff to format the codebase + $(VENV_RUN); python -m ruff format .; make lint + +lint: ## Run ruff to lint the codebase + $(VENV_RUN); python -m ruff check --output-format=full . + +test: ## Run integration tests (requires LocalStack running with the Extension installed) + $(VENV_RUN); pytest $(PYTEST_ARGS) $(TEST_PATH) + +clean-dist: clean + rm -rf dist/ + +.PHONY: clean clean-dist dist install publish usage venv format test diff --git a/paradedb/README.md b/paradedb/README.md new file mode 100644 index 0000000..46163b6 --- /dev/null +++ b/paradedb/README.md @@ -0,0 +1,95 @@ +ParadeDB on LocalStack +====================== + +This repo contains a [LocalStack Extension](https://github.com/localstack/localstack-extensions) that facilitates developing [ParadeDB](https://www.paradedb.com)-based applications locally. + +ParadeDB is an Elasticsearch alternative built on Postgres. It provides full-text search with BM25 scoring, hybrid search combining semantic and keyword search, and real-time analytics capabilities. + +After installing the extension, a ParadeDB server instance will become available and can be accessed using standard PostgreSQL clients. + +## Connection Details + +Once the extension is running, you can connect to ParadeDB using any PostgreSQL client with the following default credentials: + +- **Host**: `localhost` (or the Docker host if running in a container) +- **Port**: `5432` (mapped from the container) +- **Database**: `mydatabase` +- **Username**: `myuser` +- **Password**: `mypassword` + +Example connection using `psql`: +```bash +psql -h localhost -p 5432 -U myuser -d mydatabase +``` + +Example connection using Python: +```python +import psycopg2 + +conn = psycopg2.connect( + host="localhost", + port=5432, + database="mydatabase", + user="myuser", + password="mypassword" +) +``` + +## ParadeDB Features + +ParadeDB includes the **pg_search** extension, for both search and +analytics workloads. + +Example of BM25 scoring, from the excellent [quickstart guide](https://docs.paradedb.com/documentation/getting-started/quickstart): + +```sql +CALL paradedb.create_bm25_test_table( + schema_name => 'public', + table_name => 'mock_items' +); + +CREATE INDEX search_idx ON mock_items +USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range) +WITH (key_field='id'); + +SELECT description, pdb.score(id) +FROM mock_items +WHERE description ||| 'running shoes' AND rating > 2 +ORDER BY score DESC +LIMIT 5; +``` + +## Configuration + +The following environment variables can be passed to the LocalStack container to configure the extension: + +* `PARADEDB_POSTGRES_USER`: PostgreSQL username (default: `myuser`) +* `PARADEDB_POSTGRES_PASSWORD`: PostgreSQL password (default: `mypassword`) +* `PARADEDB_POSTGRES_DB`: Default database name (default: `mydatabase`) + +## Prerequisites + +* Docker +* LocalStack Pro (free trial available) +* `localstack` CLI +* `make` + +## Install from GitHub repository + +This extension can be installed directly from this Github repo via: + +```bash +localstack extensions install "git+https://github.com/localstack/localstack-extensions.git#egg=localstack-extension-paradedb&subdirectory=paradedb" +``` + +## Install local development version + +Please refer to the docs [here](https://github.com/localstack/localstack-extensions?tab=readme-ov-file#start-localstack-with-the-extension) for instructions on how to start the extension in developer mode. + +## Change Log + +* `0.1.0`: Initial version of the extension + +## License + +The code in this repo is available under the Apache 2.0 license. diff --git a/paradedb/localstack_paradedb/__init__.py b/paradedb/localstack_paradedb/__init__.py new file mode 100644 index 0000000..acdbec4 --- /dev/null +++ b/paradedb/localstack_paradedb/__init__.py @@ -0,0 +1 @@ +name = "localstack_paradedb" diff --git a/paradedb/localstack_paradedb/extension.py b/paradedb/localstack_paradedb/extension.py new file mode 100644 index 0000000..845dbbc --- /dev/null +++ b/paradedb/localstack_paradedb/extension.py @@ -0,0 +1,70 @@ +import os +import logging + +from localstack_paradedb.utils.docker import DatabaseDockerContainerExtension + +LOG = logging.getLogger(__name__) + +# Environment variables for configuration +ENV_POSTGRES_USER = "PARADEDB_POSTGRES_USER" +ENV_POSTGRES_PASSWORD = "PARADEDB_POSTGRES_PASSWORD" +ENV_POSTGRES_DB = "PARADEDB_POSTGRES_DB" +ENV_POSTGRES_PORT = "PARADEDB_POSTGRES_PORT" + +# Default values +DEFAULT_POSTGRES_USER = "myuser" +DEFAULT_POSTGRES_PASSWORD = "mypassword" +DEFAULT_POSTGRES_DB = "mydatabase" +DEFAULT_POSTGRES_PORT = 5432 + + +class ParadeDbExtension(DatabaseDockerContainerExtension): + name = "paradedb" + + # Name of the Docker image to spin up + DOCKER_IMAGE = "paradedb/paradedb" + + def __init__(self): + # Get configuration from environment variables + postgres_user = os.environ.get(ENV_POSTGRES_USER, DEFAULT_POSTGRES_USER) + postgres_password = os.environ.get( + ENV_POSTGRES_PASSWORD, DEFAULT_POSTGRES_PASSWORD + ) + postgres_db = os.environ.get(ENV_POSTGRES_DB, DEFAULT_POSTGRES_DB) + postgres_port = int(os.environ.get(ENV_POSTGRES_PORT, DEFAULT_POSTGRES_PORT)) + + # Environment variables to pass to the container + env_vars = { + "POSTGRES_USER": postgres_user, + "POSTGRES_PASSWORD": postgres_password, + "POSTGRES_DB": postgres_db, + } + + super().__init__( + image_name=self.DOCKER_IMAGE, + container_ports=[postgres_port], + env_vars=env_vars, + ) + + # Store configuration for connection info + self.postgres_user = postgres_user + self.postgres_password = postgres_password + self.postgres_db = postgres_db + self.postgres_port = postgres_port + + def get_connection_info(self) -> dict: + """Return connection information for ParadeDB.""" + info = super().get_connection_info() + info.update( + { + "database": self.postgres_db, + "user": self.postgres_user, + "password": self.postgres_password, + "port": self.postgres_port, + "connection_string": ( + f"postgresql://{self.postgres_user}:{self.postgres_password}" + f"@{self.container_host}:{self.postgres_port}/{self.postgres_db}" + ), + } + ) + return info diff --git a/paradedb/localstack_paradedb/utils/__init__.py b/paradedb/localstack_paradedb/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/paradedb/localstack_paradedb/utils/docker.py b/paradedb/localstack_paradedb/utils/docker.py new file mode 100644 index 0000000..6643e6d --- /dev/null +++ b/paradedb/localstack_paradedb/utils/docker.py @@ -0,0 +1,144 @@ +import re +import socket +import logging +from functools import cache +from typing import Callable + +from localstack import config +from localstack.utils.docker_utils import DOCKER_CLIENT +from localstack.extensions.api import Extension +from localstack.utils.container_utils.container_client import PortMappings +from localstack.utils.net import get_addressable_container_host +from localstack.utils.sync import retry + +LOG = logging.getLogger(__name__) +logging.getLogger("localstack_paradedb").setLevel( + logging.DEBUG if config.DEBUG else logging.INFO +) +logging.basicConfig() + + +class DatabaseDockerContainerExtension(Extension): + """ + Utility class to create a LocalStack Extension which runs a Docker container + for a database service that uses a native protocol (e.g., PostgreSQL). + + Unlike HTTP-based services, database connections are made directly to the + exposed container port rather than through the LocalStack gateway. + """ + + name: str + """Name of this extension, which must be overridden in a subclass.""" + image_name: str + """Docker image name""" + container_ports: list[int] + """List of network ports of the Docker container spun up by the extension""" + command: list[str] | None + """Optional command (and flags) to execute in the container.""" + env_vars: dict[str, str] | None + """Optional environment variables to pass to the container.""" + health_check_port: int | None + """Port to use for health check (defaults to first port in container_ports).""" + health_check_fn: Callable[[], bool] | None + """Optional custom health check function.""" + + def __init__( + self, + image_name: str, + container_ports: list[int], + command: list[str] | None = None, + env_vars: dict[str, str] | None = None, + health_check_port: int | None = None, + health_check_fn: Callable[[], bool] | None = None, + ): + self.image_name = image_name + if not container_ports: + raise ValueError("container_ports is required") + self.container_ports = container_ports + self.container_name = re.sub(r"\W", "-", f"ls-ext-{self.name}") + self.command = command + self.env_vars = env_vars + self.health_check_port = health_check_port or container_ports[0] + self.health_check_fn = health_check_fn + self.container_host = get_addressable_container_host() + + def on_extension_load(self): + LOG.info("Loading ParadeDB extension") + + def on_platform_start(self): + LOG.info("Starting ParadeDB extension - launching container") + self.start_container() + + def on_platform_shutdown(self): + self._remove_container() + + @cache + def start_container(self) -> None: + LOG.debug("Starting extension container %s", self.container_name) + + port_mapping = PortMappings() + for port in self.container_ports: + port_mapping.add(port) + + kwargs = {} + if self.command: + kwargs["command"] = self.command + if self.env_vars: + kwargs["env_vars"] = self.env_vars + + try: + DOCKER_CLIENT.run_container( + self.image_name, + detach=True, + remove=True, + name=self.container_name, + ports=port_mapping, + **kwargs, + ) + except Exception as e: + LOG.debug("Failed to start container %s: %s", self.container_name, e) + raise + + def _check_health(): + if self.health_check_fn: + assert self.health_check_fn() + else: + # Default: TCP socket check + self._check_tcp_port(self.container_host, self.health_check_port) + + try: + retry(_check_health, retries=60, sleep=1) + except Exception as e: + LOG.info("Failed to connect to container %s: %s", self.container_name, e) + self._remove_container() + raise + + LOG.info( + "Successfully started extension container %s on %s:%s", + self.container_name, + self.container_host, + self.health_check_port, + ) + + def _check_tcp_port(self, host: str, port: int, timeout: float = 2.0) -> None: + """Check if a TCP port is accepting connections.""" + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(timeout) + try: + sock.connect((host, port)) + sock.close() + except (socket.timeout, socket.error) as e: + raise AssertionError(f"Port {port} not ready: {e}") + + def _remove_container(self): + LOG.debug("Stopping extension container %s", self.container_name) + DOCKER_CLIENT.remove_container( + self.container_name, force=True, check_existence=False + ) + + def get_connection_info(self) -> dict: + """Return connection information for the database.""" + return { + "host": self.container_host, + "ports": {port: port for port in self.container_ports}, + } diff --git a/paradedb/pyproject.toml b/paradedb/pyproject.toml new file mode 100644 index 0000000..291d857 --- /dev/null +++ b/paradedb/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools", "wheel", "plux>=1.3.1"] +build-backend = "setuptools.build_meta" + +[project] +name = "localstack-extension-paradedb" +version = "0.1.0" +description = "LocalStack Extension: ParadeDB on LocalStack" +readme = {file = "README.md", content-type = "text/markdown; charset=UTF-8"} +requires-python = ">=3.10" +authors = [ + { name = "LocalStack team"} +] +keywords = ["LocalStack", "ParadeDB", "PostgreSQL", "Search", "Analytics"] +classifiers = [] +dependencies = [] + +[project.urls] +Homepage = "https://github.com/localstack/localstack-extensions" + +[project.optional-dependencies] +dev = [ + "boto3", + "build", + "jsonpatch", + "localstack", + "psycopg2-binary", + "pytest", + "rolo", + "ruff", +] + +[project.entry-points."localstack.extensions"] +localstack_paradedb = "localstack_paradedb.extension:ParadeDbExtension" diff --git a/paradedb/tests/__init__.py b/paradedb/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/paradedb/tests/test_extension.py b/paradedb/tests/test_extension.py new file mode 100644 index 0000000..bd1277e --- /dev/null +++ b/paradedb/tests/test_extension.py @@ -0,0 +1,105 @@ +import psycopg2 +from localstack.utils.strings import short_uid + + +# Connection details for ParadeDB +HOST = "localhost" +PORT = 5432 +USER = "myuser" +PASSWORD = "mypassword" +DATABASE = "mydatabase" + + +def get_connection(): + """Create a connection to ParadeDB.""" + return psycopg2.connect( + host=HOST, + port=PORT, + user=USER, + password=PASSWORD, + database=DATABASE, + ) + + +def test_connect_to_paradedb(): + """Test basic connection to ParadeDB.""" + conn = get_connection() + cursor = conn.cursor() + + # Check PostgreSQL version + cursor.execute("SELECT version();") + version = cursor.fetchone()[0] + assert "PostgreSQL" in version + + cursor.close() + conn.close() + + +def test_paradedb_quickstart(): + """Test some of ParadeDB's quickstart examples.""" + conn = get_connection() + cursor = conn.cursor() + + table_name = f"mock_items_{short_uid()}" + index_name = f"{table_name}_idx" + + try: + # Load sample data + cursor.execute(f""" + CALL paradedb.create_bm25_test_table( + schema_name => 'public', + table_name => '{table_name}' + ); + """) + + # Create search index + cursor.execute(f""" + CREATE INDEX search_idx ON {table_name} + USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range) + WITH (key_field='id'); + """) + + cursor.execute(f""" + SELECT description, rating, category + FROM {table_name} + LIMIT 3; + """) + results = cursor.fetchall() + assert results == [ + ("Ergonomic metal keyboard", 4, "Electronics"), + ("Plastic Keyboard", 4, "Electronics"), + ("Sleek running shoes", 5, "Footwear"), + ] + + # Match conjunction + cursor.execute(f""" + SELECT description, rating, category + FROM {table_name} + WHERE description &&& 'running shoes' AND rating > 2 + ORDER BY rating + LIMIT 5; + """) + results = cursor.fetchall() + assert results == [("Sleek running shoes", 5, "Footwear")] + + # BM25 scoring + cursor.execute(f""" + SELECT description, pdb.score(id) + FROM {table_name} + WHERE description ||| 'running shoes' AND rating > 2 + ORDER BY score DESC + LIMIT 5; + """) + results = cursor.fetchall() + assert results == [ + ("Sleek running shoes", 6.817111), + ("Generic shoes", 3.8772602), + ("White jogging shoes", 3.4849067), + ] + finally: + # Cleanup - drop index first, then table + cursor.execute(f"DROP INDEX IF EXISTS {index_name};") + cursor.execute(f"DROP TABLE IF EXISTS {table_name};") + conn.commit() + cursor.close() + conn.close()