From 46d2dc34534e53614ba401950637c5ca666fa736 Mon Sep 17 00:00:00 2001 From: Waldemar Hummer Date: Tue, 27 Jan 2026 10:18:57 +0100 Subject: [PATCH 01/10] add initial version of ParadeDB extension LocalStack extension for ParadeDB (PostgreSQL-based search and analytics). Features: - Runs paradedb/paradedb Docker container - Exposes PostgreSQL port 5432 for direct connections - Configurable via PARADEDB_POSTGRES_USER/PASSWORD/DB env vars - Integration tests for basic SQL and pg_search BM25 functionality - CI workflow for automated testing Co-Authored-By: Claude Opus 4.5 --- .github/workflows/paradedb.yml | 53 ++++ paradedb/.gitignore | 5 + paradedb/Makefile | 48 ++++ paradedb/README.md | 100 +++++++ paradedb/localstack_paradedb/__init__.py | 1 + paradedb/localstack_paradedb/extension.py | 64 +++++ .../localstack_paradedb/utils/__init__.py | 0 paradedb/localstack_paradedb/utils/docker.py | 144 ++++++++++ paradedb/pyproject.toml | 34 +++ paradedb/tests/__init__.py | 0 paradedb/tests/test_extension.py | 269 ++++++++++++++++++ 11 files changed, 718 insertions(+) create mode 100644 .github/workflows/paradedb.yml create mode 100644 paradedb/.gitignore create mode 100644 paradedb/Makefile create mode 100644 paradedb/README.md create mode 100644 paradedb/localstack_paradedb/__init__.py create mode 100644 paradedb/localstack_paradedb/extension.py create mode 100644 paradedb/localstack_paradedb/utils/__init__.py create mode 100644 paradedb/localstack_paradedb/utils/docker.py create mode 100644 paradedb/pyproject.toml create mode 100644 paradedb/tests/__init__.py create mode 100644 paradedb/tests/test_extension.py diff --git a/.github/workflows/paradedb.yml b/.github/workflows/paradedb.yml new file mode 100644 index 0000000..51c53da --- /dev/null +++ b/.github/workflows/paradedb.yml @@ -0,0 +1,53 @@ +name: LocalStack ParadeDB Extension Tests + +on: + push: + paths: + - paradedb/** + branches: + - main + pull_request: + paths: + - .github/workflows/paradedb.yml + - paradedb/** + workflow_dispatch: + +env: + LOCALSTACK_DISABLE_EVENTS: "1" + LOCALSTACK_AUTH_TOKEN: ${{ secrets.LOCALSTACK_AUTH_TOKEN }} + +jobs: + integration-tests: + name: Run Integration Tests + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup LocalStack and extension + run: | + cd paradedb + + docker pull localstack/localstack-pro & + docker pull paradedb/paradedb & + pip install localstack + + make install + make lint + make dist + localstack extensions -v install file://$(ls ./dist/localstack_extension_paradedb-*.tar.gz) + + DEBUG=1 localstack start -d + localstack wait + + - name: Run integration tests + run: | + cd paradedb + make test + + - name: Print logs + if: always() + run: | + localstack logs + localstack stop diff --git a/paradedb/.gitignore b/paradedb/.gitignore new file mode 100644 index 0000000..1808cca --- /dev/null +++ b/paradedb/.gitignore @@ -0,0 +1,5 @@ +.venv +dist +build +**/*.egg-info +.eggs diff --git a/paradedb/Makefile b/paradedb/Makefile new file mode 100644 index 0000000..dea9383 --- /dev/null +++ b/paradedb/Makefile @@ -0,0 +1,48 @@ +VENV_BIN = python3 -m venv +VENV_DIR ?= .venv +VENV_ACTIVATE = $(VENV_DIR)/bin/activate +VENV_RUN = . $(VENV_ACTIVATE) +TEST_PATH ?= tests + +usage: ## Shows usage for this Makefile + @cat Makefile | grep -E '^[a-zA-Z_-]+:.*?## .*$$' | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}' + +venv: $(VENV_ACTIVATE) + +$(VENV_ACTIVATE): pyproject.toml + test -d .venv || $(VENV_BIN) .venv + $(VENV_RUN); pip install --upgrade pip setuptools plux + $(VENV_RUN); pip install -e .[dev] + touch $(VENV_DIR)/bin/activate + +clean: + rm -rf .venv/ + rm -rf build/ + rm -rf .eggs/ + rm -rf *.egg-info/ + +install: venv ## Install dependencies + $(VENV_RUN); python -m plux entrypoints + +dist: venv ## Create distribution + $(VENV_RUN); python -m build + +publish: clean-dist venv dist ## Publish extension to pypi + $(VENV_RUN); pip install --upgrade twine; twine upload dist/* + +entrypoints: venv ## Generate plugin entrypoints for Python package + $(VENV_RUN); python -m plux entrypoints + +format: ## Run ruff to format the codebase + $(VENV_RUN); python -m ruff format .; make lint + +lint: ## Run ruff to lint the codebase + $(VENV_RUN); python -m ruff check --output-format=full . + +test: ## Run integration tests (requires LocalStack running with the Extension installed) + $(VENV_RUN); pytest $(PYTEST_ARGS) $(TEST_PATH) + +clean-dist: clean + rm -rf dist/ + +.PHONY: clean clean-dist dist install publish usage venv format test diff --git a/paradedb/README.md b/paradedb/README.md new file mode 100644 index 0000000..21aa416 --- /dev/null +++ b/paradedb/README.md @@ -0,0 +1,100 @@ +ParadeDB on LocalStack +====================== + +This repo contains a [LocalStack Extension](https://github.com/localstack/localstack-extensions) that facilitates developing [ParadeDB](https://www.paradedb.com)-based applications locally. + +ParadeDB is an Elasticsearch alternative built on Postgres. It provides full-text search with BM25 scoring, hybrid search combining semantic and keyword search, and real-time analytics capabilities. + +After installing the extension, a ParadeDB server instance will become available and can be accessed using standard PostgreSQL clients. + +## Connection Details + +Once the extension is running, you can connect to ParadeDB using any PostgreSQL client with the following default credentials: + +- **Host**: `localhost` (or the Docker host if running in a container) +- **Port**: `5432` (mapped from the container) +- **Database**: `postgres` +- **Username**: `postgres` +- **Password**: `postgres` + +Example connection using `psql`: +```bash +psql -h localhost -p 5432 -U postgres -d postgres +``` + +Example connection using Python: +```python +import psycopg2 + +conn = psycopg2.connect( + host="localhost", + port=5432, + database="postgres", + user="postgres", + password="postgres" +) +``` + +## ParadeDB Features + +ParadeDB includes several powerful extensions: + +- **pg_search**: Full-text search with BM25 ranking +- **pg_analytics**: DuckDB-powered analytics for OLAP workloads +- **pg_lakehouse**: Query data lakes (S3, Delta Lake, Iceberg) directly + +Example using pg_search: +```sql +-- Create a table with search index +CREATE TABLE products ( + id SERIAL PRIMARY KEY, + name TEXT, + description TEXT +); + +-- Create a BM25 search index +CALL paradedb.create_bm25( + index_name => 'products_idx', + table_name => 'products', + key_field => 'id', + text_fields => paradedb.field('name') || paradedb.field('description') +); + +-- Search with BM25 scoring +SELECT * FROM products.search('description:electronics'); +``` + +## Configuration + +The following environment variables can be passed to the LocalStack container to configure the extension: + +* `PARADEDB_POSTGRES_USER`: PostgreSQL username (default: `postgres`) +* `PARADEDB_POSTGRES_PASSWORD`: PostgreSQL password (default: `postgres`) +* `PARADEDB_POSTGRES_DB`: Default database name (default: `postgres`) + +## Prerequisites + +* Docker +* LocalStack Pro (free trial available) +* `localstack` CLI +* `make` + +## Install from GitHub repository + +This extension can be installed directly from this Github repo via: + +```bash +localstack extensions install "git+https://github.com/localstack/localstack-extensions.git#egg=localstack-extension-paradedb&subdirectory=paradedb" +``` + +## Install local development version + +Please refer to the docs [here](https://github.com/localstack/localstack-extensions?tab=readme-ov-file#start-localstack-with-the-extension) for instructions on how to start the extension in developer mode. + +## Change Log + +* `0.1.0`: Initial version of the extension + +## License + +The code in this repo is available under the Apache 2.0 license. diff --git a/paradedb/localstack_paradedb/__init__.py b/paradedb/localstack_paradedb/__init__.py new file mode 100644 index 0000000..acdbec4 --- /dev/null +++ b/paradedb/localstack_paradedb/__init__.py @@ -0,0 +1 @@ +name = "localstack_paradedb" diff --git a/paradedb/localstack_paradedb/extension.py b/paradedb/localstack_paradedb/extension.py new file mode 100644 index 0000000..d1d60b0 --- /dev/null +++ b/paradedb/localstack_paradedb/extension.py @@ -0,0 +1,64 @@ +import os +import logging + +from localstack_paradedb.utils.docker import DatabaseDockerContainerExtension + +LOG = logging.getLogger(__name__) + +# Environment variables for configuration +ENV_POSTGRES_USER = "PARADEDB_POSTGRES_USER" +ENV_POSTGRES_PASSWORD = "PARADEDB_POSTGRES_PASSWORD" +ENV_POSTGRES_DB = "PARADEDB_POSTGRES_DB" + +# Default values +DEFAULT_POSTGRES_USER = "postgres" +DEFAULT_POSTGRES_PASSWORD = "postgres" +DEFAULT_POSTGRES_DB = "postgres" + + +class ParadeDbExtension(DatabaseDockerContainerExtension): + name = "paradedb" + + # Name of the Docker image to spin up + DOCKER_IMAGE = "paradedb/paradedb" + # Default port for PostgreSQL + POSTGRES_PORT = 5432 + + def __init__(self): + # Get configuration from environment variables + postgres_user = os.environ.get(ENV_POSTGRES_USER, DEFAULT_POSTGRES_USER) + postgres_password = os.environ.get(ENV_POSTGRES_PASSWORD, DEFAULT_POSTGRES_PASSWORD) + postgres_db = os.environ.get(ENV_POSTGRES_DB, DEFAULT_POSTGRES_DB) + + # Environment variables to pass to the container + env_vars = { + "POSTGRES_USER": postgres_user, + "POSTGRES_PASSWORD": postgres_password, + "POSTGRES_DB": postgres_db, + } + + super().__init__( + image_name=self.DOCKER_IMAGE, + container_ports=[self.POSTGRES_PORT], + env_vars=env_vars, + ) + + # Store configuration for connection info + self.postgres_user = postgres_user + self.postgres_password = postgres_password + self.postgres_db = postgres_db + + def get_connection_info(self) -> dict: + """Return connection information for ParadeDB.""" + info = super().get_connection_info() + info.update({ + "database": self.postgres_db, + "user": self.postgres_user, + "password": self.postgres_password, + "port": self.POSTGRES_PORT, + "connection_string": ( + f"postgresql://{self.postgres_user}:{self.postgres_password}" + f"@{self.container_host}:{self.POSTGRES_PORT}/{self.postgres_db}" + ), + }) + return info diff --git a/paradedb/localstack_paradedb/utils/__init__.py b/paradedb/localstack_paradedb/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/paradedb/localstack_paradedb/utils/docker.py b/paradedb/localstack_paradedb/utils/docker.py new file mode 100644 index 0000000..6643e6d --- /dev/null +++ b/paradedb/localstack_paradedb/utils/docker.py @@ -0,0 +1,144 @@ +import re +import socket +import logging +from functools import cache +from typing import Callable + +from localstack import config +from localstack.utils.docker_utils import DOCKER_CLIENT +from localstack.extensions.api import Extension +from localstack.utils.container_utils.container_client import PortMappings +from localstack.utils.net import get_addressable_container_host +from localstack.utils.sync import retry + +LOG = logging.getLogger(__name__) +logging.getLogger("localstack_paradedb").setLevel( + logging.DEBUG if config.DEBUG else logging.INFO +) +logging.basicConfig() + + +class DatabaseDockerContainerExtension(Extension): + """ + Utility class to create a LocalStack Extension which runs a Docker container + for a database service that uses a native protocol (e.g., PostgreSQL). + + Unlike HTTP-based services, database connections are made directly to the + exposed container port rather than through the LocalStack gateway. + """ + + name: str + """Name of this extension, which must be overridden in a subclass.""" + image_name: str + """Docker image name""" + container_ports: list[int] + """List of network ports of the Docker container spun up by the extension""" + command: list[str] | None + """Optional command (and flags) to execute in the container.""" + env_vars: dict[str, str] | None + """Optional environment variables to pass to the container.""" + health_check_port: int | None + """Port to use for health check (defaults to first port in container_ports).""" + health_check_fn: Callable[[], bool] | None + """Optional custom health check function.""" + + def __init__( + self, + image_name: str, + container_ports: list[int], + command: list[str] | None = None, + env_vars: dict[str, str] | None = None, + health_check_port: int | None = None, + health_check_fn: Callable[[], bool] | None = None, + ): + self.image_name = image_name + if not container_ports: + raise ValueError("container_ports is required") + self.container_ports = container_ports + self.container_name = re.sub(r"\W", "-", f"ls-ext-{self.name}") + self.command = command + self.env_vars = env_vars + self.health_check_port = health_check_port or container_ports[0] + self.health_check_fn = health_check_fn + self.container_host = get_addressable_container_host() + + def on_extension_load(self): + LOG.info("Loading ParadeDB extension") + + def on_platform_start(self): + LOG.info("Starting ParadeDB extension - launching container") + self.start_container() + + def on_platform_shutdown(self): + self._remove_container() + + @cache + def start_container(self) -> None: + LOG.debug("Starting extension container %s", self.container_name) + + port_mapping = PortMappings() + for port in self.container_ports: + port_mapping.add(port) + + kwargs = {} + if self.command: + kwargs["command"] = self.command + if self.env_vars: + kwargs["env_vars"] = self.env_vars + + try: + DOCKER_CLIENT.run_container( + self.image_name, + detach=True, + remove=True, + name=self.container_name, + ports=port_mapping, + **kwargs, + ) + except Exception as e: + LOG.debug("Failed to start container %s: %s", self.container_name, e) + raise + + def _check_health(): + if self.health_check_fn: + assert self.health_check_fn() + else: + # Default: TCP socket check + self._check_tcp_port(self.container_host, self.health_check_port) + + try: + retry(_check_health, retries=60, sleep=1) + except Exception as e: + LOG.info("Failed to connect to container %s: %s", self.container_name, e) + self._remove_container() + raise + + LOG.info( + "Successfully started extension container %s on %s:%s", + self.container_name, + self.container_host, + self.health_check_port, + ) + + def _check_tcp_port(self, host: str, port: int, timeout: float = 2.0) -> None: + """Check if a TCP port is accepting connections.""" + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(timeout) + try: + sock.connect((host, port)) + sock.close() + except (socket.timeout, socket.error) as e: + raise AssertionError(f"Port {port} not ready: {e}") + + def _remove_container(self): + LOG.debug("Stopping extension container %s", self.container_name) + DOCKER_CLIENT.remove_container( + self.container_name, force=True, check_existence=False + ) + + def get_connection_info(self) -> dict: + """Return connection information for the database.""" + return { + "host": self.container_host, + "ports": {port: port for port in self.container_ports}, + } diff --git a/paradedb/pyproject.toml b/paradedb/pyproject.toml new file mode 100644 index 0000000..d21cbc4 --- /dev/null +++ b/paradedb/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools", "wheel", "plux>=1.3.1"] +build-backend = "setuptools.build_meta" + +[project] +name = "localstack-extension-paradedb" +version = "0.1.0" +description = "LocalStack Extension: ParadeDB on LocalStack" +readme = {file = "README.md", content-type = "text/markdown; charset=UTF-8"} +requires-python = ">=3.10" +authors = [ + { name = "LocalStack team"} +] +keywords = ["LocalStack", "ParadeDB", "PostgreSQL"] +classifiers = [] +dependencies = [] + +[project.urls] +Homepage = "https://github.com/localstack/localstack-extensions" + +[project.optional-dependencies] +dev = [ + "boto3", + "build", + "jsonpatch", + "localstack", + "psycopg2-binary", + "pytest", + "rolo", + "ruff", +] + +[project.entry-points."localstack.extensions"] +localstack_paradedb = "localstack_paradedb.extension:ParadeDbExtension" diff --git a/paradedb/tests/__init__.py b/paradedb/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/paradedb/tests/test_extension.py b/paradedb/tests/test_extension.py new file mode 100644 index 0000000..043d461 --- /dev/null +++ b/paradedb/tests/test_extension.py @@ -0,0 +1,269 @@ +import psycopg2 +from localstack.utils.strings import short_uid + + +# Connection details for ParadeDB +HOST = "localhost" +PORT = 5432 +USER = "postgres" +PASSWORD = "postgres" +DATABASE = "postgres" + + +def get_connection(): + """Create a connection to ParadeDB.""" + return psycopg2.connect( + host=HOST, + port=PORT, + user=USER, + password=PASSWORD, + database=DATABASE, + ) + + +def test_connect_to_paradedb(): + """Test basic connection to ParadeDB.""" + conn = get_connection() + cursor = conn.cursor() + + # Check PostgreSQL version + cursor.execute("SELECT version();") + version = cursor.fetchone()[0] + assert "PostgreSQL" in version + + cursor.close() + conn.close() + + +def test_create_table_and_insert(): + """Test creating a table and inserting data.""" + conn = get_connection() + cursor = conn.cursor() + + table_name = f"test_table_{short_uid()}" + + try: + # Create table + cursor.execute(f""" + CREATE TABLE {table_name} ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + description TEXT + ); + """) + conn.commit() + + # Insert data + cursor.execute(f""" + INSERT INTO {table_name} (name, description) + VALUES ('Product A', 'A great product'), + ('Product B', 'Another product'), + ('Product C', 'Yet another product'); + """) + conn.commit() + + # Query data + cursor.execute(f"SELECT * FROM {table_name} ORDER BY id;") + results = cursor.fetchall() + + assert len(results) == 3 + assert results[0][1] == "Product A" + assert results[1][1] == "Product B" + assert results[2][1] == "Product C" + + finally: + # Cleanup + cursor.execute(f"DROP TABLE IF EXISTS {table_name};") + conn.commit() + cursor.close() + conn.close() + + +def test_paradedb_pg_search_extension(): + """Test ParadeDB's pg_search extension for full-text search.""" + conn = get_connection() + cursor = conn.cursor() + + table_name = f"products_{short_uid()}" + index_name = f"{table_name}_idx" + + try: + # Create table + cursor.execute(f""" + CREATE TABLE {table_name} ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + description TEXT + ); + """) + conn.commit() + + # Insert sample data + cursor.execute(f""" + INSERT INTO {table_name} (name, description) VALUES + ('Laptop', 'High performance laptop with 16GB RAM and SSD storage'), + ('Smartphone', 'Latest smartphone with advanced camera features'), + ('Headphones', 'Wireless noise-canceling headphones for music lovers'), + ('Tablet', 'Portable tablet with retina display'), + ('Smartwatch', 'Fitness tracking smartwatch with heart rate monitor'); + """) + conn.commit() + + # Create BM25 search index using ParadeDB + cursor.execute(f""" + CALL paradedb.create_bm25( + index_name => '{index_name}', + table_name => '{table_name}', + key_field => 'id', + text_fields => paradedb.field('name') || paradedb.field('description') + ); + """) + conn.commit() + + # Search for products containing 'wireless' + cursor.execute(f""" + SELECT id, name, description + FROM {index_name}.search('description:wireless'); + """) + results = cursor.fetchall() + + assert len(results) >= 1 + assert any("Headphones" in row[1] for row in results) + + # Search for products containing 'laptop' + cursor.execute(f""" + SELECT id, name, description + FROM {index_name}.search('name:laptop OR description:laptop'); + """) + results = cursor.fetchall() + + assert len(results) >= 1 + assert any("Laptop" in row[1] for row in results) + + finally: + # Cleanup + cursor.execute(f"CALL paradedb.drop_bm25('{index_name}');") + cursor.execute(f"DROP TABLE IF EXISTS {table_name};") + conn.commit() + cursor.close() + conn.close() + + +def test_paradedb_hybrid_search(): + """Test ParadeDB's hybrid search capabilities.""" + conn = get_connection() + cursor = conn.cursor() + + table_name = f"docs_{short_uid()}" + index_name = f"{table_name}_idx" + + try: + # Create table with text content + cursor.execute(f""" + CREATE TABLE {table_name} ( + id SERIAL PRIMARY KEY, + title TEXT NOT NULL, + content TEXT + ); + """) + conn.commit() + + # Insert sample documents + cursor.execute(f""" + INSERT INTO {table_name} (title, content) VALUES + ('Introduction to Python', 'Python is a versatile programming language used for web development, data science, and automation.'), + ('JavaScript Basics', 'JavaScript is essential for front-end web development and can also be used on the server side with Node.js.'), + ('Database Design', 'Good database design is crucial for application performance and data integrity.'), + ('Machine Learning 101', 'Machine learning enables computers to learn from data without explicit programming.'), + ('Cloud Computing', 'Cloud computing provides on-demand access to computing resources over the internet.'); + """) + conn.commit() + + # Create search index + cursor.execute(f""" + CALL paradedb.create_bm25( + index_name => '{index_name}', + table_name => '{table_name}', + key_field => 'id', + text_fields => paradedb.field('title') || paradedb.field('content') + ); + """) + conn.commit() + + # Search for programming-related documents + cursor.execute(f""" + SELECT id, title, paradedb.score(id) as score + FROM {index_name}.search('content:programming') + ORDER BY score DESC; + """) + results = cursor.fetchall() + + assert len(results) >= 1 + # Python and Machine Learning docs should match + titles = [row[1] for row in results] + assert any("Python" in t or "Machine Learning" in t for t in titles) + + finally: + # Cleanup + cursor.execute(f"CALL paradedb.drop_bm25('{index_name}');") + cursor.execute(f"DROP TABLE IF EXISTS {table_name};") + conn.commit() + cursor.close() + conn.close() + + +def test_standard_postgres_features(): + """Test that standard PostgreSQL features work correctly.""" + conn = get_connection() + cursor = conn.cursor() + + table_name = f"users_{short_uid()}" + + try: + # Create table with various PostgreSQL types + cursor.execute(f""" + CREATE TABLE {table_name} ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL, + email VARCHAR(255) UNIQUE, + metadata JSONB, + tags TEXT[], + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + """) + conn.commit() + + # Insert data with JSONB and arrays + cursor.execute(f""" + INSERT INTO {table_name} (name, email, metadata, tags) + VALUES + ('Alice', 'alice@example.com', '{{"role": "admin", "level": 5}}', ARRAY['active', 'premium']), + ('Bob', 'bob@example.com', '{{"role": "user", "level": 2}}', ARRAY['active']), + ('Charlie', 'charlie@example.com', '{{"role": "user", "level": 3}}', ARRAY['inactive']); + """) + conn.commit() + + # Query with JSONB operators + cursor.execute(f""" + SELECT name FROM {table_name} + WHERE metadata->>'role' = 'admin'; + """) + results = cursor.fetchall() + assert len(results) == 1 + assert results[0][0] == "Alice" + + # Query with array operators + cursor.execute(f""" + SELECT name FROM {table_name} + WHERE 'premium' = ANY(tags); + """) + results = cursor.fetchall() + assert len(results) == 1 + assert results[0][0] == "Alice" + + finally: + # Cleanup + cursor.execute(f"DROP TABLE IF EXISTS {table_name};") + conn.commit() + cursor.close() + conn.close() From 824122ae3fbef5c97dc213af88090b925626d4c7 Mon Sep 17 00:00:00 2001 From: Waldemar Hummer Date: Tue, 27 Jan 2026 10:39:26 +0100 Subject: [PATCH 02/10] fix ParadeDB tests to use v2 API syntax Update tests to use the new ParadeDB v2 API: - Use CREATE INDEX ... USING bm25 instead of CALL paradedb.create_bm25() - Use ||| operator for disjunction (OR) search - Use &&& operator for conjunction (AND) search - Use pdb.score() for relevance scoring - Use DROP INDEX instead of paradedb.drop_bm25() Co-Authored-By: Claude Opus 4.5 --- paradedb/tests/test_extension.py | 109 +++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 27 deletions(-) diff --git a/paradedb/tests/test_extension.py b/paradedb/tests/test_extension.py index 043d461..1994e7c 100644 --- a/paradedb/tests/test_extension.py +++ b/paradedb/tests/test_extension.py @@ -80,7 +80,7 @@ def test_create_table_and_insert(): def test_paradedb_pg_search_extension(): - """Test ParadeDB's pg_search extension for full-text search.""" + """Test ParadeDB's pg_search extension for full-text search using v2 API.""" conn = get_connection() cursor = conn.cursor() @@ -109,31 +109,30 @@ def test_paradedb_pg_search_extension(): """) conn.commit() - # Create BM25 search index using ParadeDB + # Create BM25 search index using ParadeDB v2 API (CREATE INDEX syntax) cursor.execute(f""" - CALL paradedb.create_bm25( - index_name => '{index_name}', - table_name => '{table_name}', - key_field => 'id', - text_fields => paradedb.field('name') || paradedb.field('description') - ); + CREATE INDEX {index_name} ON {table_name} + USING bm25 (id, name, description) + WITH (key_field='id'); """) conn.commit() - # Search for products containing 'wireless' + # Search for products containing 'wireless' using ||| operator (disjunction/OR) cursor.execute(f""" SELECT id, name, description - FROM {index_name}.search('description:wireless'); + FROM {table_name} + WHERE description ||| 'wireless'; """) results = cursor.fetchall() assert len(results) >= 1 assert any("Headphones" in row[1] for row in results) - # Search for products containing 'laptop' + # Search for products containing 'laptop' in name or description cursor.execute(f""" SELECT id, name, description - FROM {index_name}.search('name:laptop OR description:laptop'); + FROM {table_name} + WHERE name ||| 'laptop' OR description ||| 'laptop'; """) results = cursor.fetchall() @@ -141,16 +140,16 @@ def test_paradedb_pg_search_extension(): assert any("Laptop" in row[1] for row in results) finally: - # Cleanup - cursor.execute(f"CALL paradedb.drop_bm25('{index_name}');") + # Cleanup - drop index first, then table + cursor.execute(f"DROP INDEX IF EXISTS {index_name};") cursor.execute(f"DROP TABLE IF EXISTS {table_name};") conn.commit() cursor.close() conn.close() -def test_paradedb_hybrid_search(): - """Test ParadeDB's hybrid search capabilities.""" +def test_paradedb_search_with_scoring(): + """Test ParadeDB's BM25 search with relevance scoring.""" conn = get_connection() cursor = conn.cursor() @@ -179,21 +178,19 @@ def test_paradedb_hybrid_search(): """) conn.commit() - # Create search index + # Create search index using v2 API cursor.execute(f""" - CALL paradedb.create_bm25( - index_name => '{index_name}', - table_name => '{table_name}', - key_field => 'id', - text_fields => paradedb.field('title') || paradedb.field('content') - ); + CREATE INDEX {index_name} ON {table_name} + USING bm25 (id, title, content) + WITH (key_field='id'); """) conn.commit() - # Search for programming-related documents + # Search for programming-related documents with scoring cursor.execute(f""" - SELECT id, title, paradedb.score(id) as score - FROM {index_name}.search('content:programming') + SELECT id, title, pdb.score(id) as score + FROM {table_name} + WHERE content ||| 'programming' ORDER BY score DESC; """) results = cursor.fetchall() @@ -205,7 +202,65 @@ def test_paradedb_hybrid_search(): finally: # Cleanup - cursor.execute(f"CALL paradedb.drop_bm25('{index_name}');") + cursor.execute(f"DROP INDEX IF EXISTS {index_name};") + cursor.execute(f"DROP TABLE IF EXISTS {table_name};") + conn.commit() + cursor.close() + conn.close() + + +def test_paradedb_conjunction_search(): + """Test ParadeDB's conjunction (AND) search using &&& operator.""" + conn = get_connection() + cursor = conn.cursor() + + table_name = f"items_{short_uid()}" + index_name = f"{table_name}_idx" + + try: + # Create table + cursor.execute(f""" + CREATE TABLE {table_name} ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + description TEXT + ); + """) + conn.commit() + + # Insert sample data + cursor.execute(f""" + INSERT INTO {table_name} (name, description) VALUES + ('Running Shoes', 'Lightweight running shoes for marathon training'), + ('Walking Shoes', 'Comfortable walking shoes for daily use'), + ('Running Gear', 'Essential gear for running enthusiasts'), + ('Basketball Shoes', 'High-top basketball shoes with ankle support'); + """) + conn.commit() + + # Create BM25 index + cursor.execute(f""" + CREATE INDEX {index_name} ON {table_name} + USING bm25 (id, name, description) + WITH (key_field='id'); + """) + conn.commit() + + # Search using conjunction (AND) - must contain both 'running' AND 'shoes' + cursor.execute(f""" + SELECT id, name, description + FROM {table_name} + WHERE name &&& 'running shoes'; + """) + results = cursor.fetchall() + + assert len(results) >= 1 + # Should match "Running Shoes" but not "Running Gear" or "Walking Shoes" + assert any("Running Shoes" in row[1] for row in results) + + finally: + # Cleanup + cursor.execute(f"DROP INDEX IF EXISTS {index_name};") cursor.execute(f"DROP TABLE IF EXISTS {table_name};") conn.commit() cursor.close() From fcbe2684d8d4bcbda7b8456b478f1ebdea351bb9 Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 18:25:26 +0000 Subject: [PATCH 03/10] [paradedb] Prefer more distinct connection credentials --- flake.nix | 22 ++++++++++++++++++++++ paradedb/README.md | 20 ++++++++++---------- paradedb/localstack_paradedb/extension.py | 6 +++--- paradedb/tests/test_extension.py | 6 +++--- 4 files changed, 38 insertions(+), 16 deletions(-) create mode 100644 flake.nix diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..edf2f11 --- /dev/null +++ b/flake.nix @@ -0,0 +1,22 @@ +{ + description = "localstack-extensions"; + + inputs = { + nixpkgs.url = "nixpkgs/nixpkgs-unstable"; + }; + + outputs = { self, nixpkgs }@inputs: + ( + let + forAllSystems = nixpkgs.lib.genAttrs nixpkgs.lib.platforms.all; + in + { + devShell = forAllSystems (system: + let pkgs = import nixpkgs { inherit system; }; in + pkgs.mkShell { + buildInputs = with pkgs; [ uv python311 python311Packages.pip ty ]; + } + ); + } + ); +} diff --git a/paradedb/README.md b/paradedb/README.md index 21aa416..d45817a 100644 --- a/paradedb/README.md +++ b/paradedb/README.md @@ -13,13 +13,13 @@ Once the extension is running, you can connect to ParadeDB using any PostgreSQL - **Host**: `localhost` (or the Docker host if running in a container) - **Port**: `5432` (mapped from the container) -- **Database**: `postgres` -- **Username**: `postgres` -- **Password**: `postgres` +- **Database**: `mydatabase` +- **Username**: `myuser` +- **Password**: `mypassword` Example connection using `psql`: ```bash -psql -h localhost -p 5432 -U postgres -d postgres +psql -h localhost -p 5432 -U myuser -d mydatabase ``` Example connection using Python: @@ -29,9 +29,9 @@ import psycopg2 conn = psycopg2.connect( host="localhost", port=5432, - database="postgres", - user="postgres", - password="postgres" + database="mydatabase", + user="myuser", + password="mypassword" ) ``` @@ -68,9 +68,9 @@ SELECT * FROM products.search('description:electronics'); The following environment variables can be passed to the LocalStack container to configure the extension: -* `PARADEDB_POSTGRES_USER`: PostgreSQL username (default: `postgres`) -* `PARADEDB_POSTGRES_PASSWORD`: PostgreSQL password (default: `postgres`) -* `PARADEDB_POSTGRES_DB`: Default database name (default: `postgres`) +* `PARADEDB_POSTGRES_USER`: PostgreSQL username (default: `myuser`) +* `PARADEDB_POSTGRES_PASSWORD`: PostgreSQL password (default: `mypassword`) +* `PARADEDB_POSTGRES_DB`: Default database name (default: `mydatabase`) ## Prerequisites diff --git a/paradedb/localstack_paradedb/extension.py b/paradedb/localstack_paradedb/extension.py index d1d60b0..40f3221 100644 --- a/paradedb/localstack_paradedb/extension.py +++ b/paradedb/localstack_paradedb/extension.py @@ -11,9 +11,9 @@ ENV_POSTGRES_DB = "PARADEDB_POSTGRES_DB" # Default values -DEFAULT_POSTGRES_USER = "postgres" -DEFAULT_POSTGRES_PASSWORD = "postgres" -DEFAULT_POSTGRES_DB = "postgres" +DEFAULT_POSTGRES_USER = "myuser" +DEFAULT_POSTGRES_PASSWORD = "mypassword" +DEFAULT_POSTGRES_DB = "mydatabase" class ParadeDbExtension(DatabaseDockerContainerExtension): diff --git a/paradedb/tests/test_extension.py b/paradedb/tests/test_extension.py index 1994e7c..06fa38f 100644 --- a/paradedb/tests/test_extension.py +++ b/paradedb/tests/test_extension.py @@ -5,9 +5,9 @@ # Connection details for ParadeDB HOST = "localhost" PORT = 5432 -USER = "postgres" -PASSWORD = "postgres" -DATABASE = "postgres" +USER = "myuser" +PASSWORD = "mypassword" +DATABASE = "mydatabase" def get_connection(): From c86df6b6b5e72f8086c1fb6ee44b0d254ef8dc48 Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 18:26:47 +0000 Subject: [PATCH 04/10] [paradedb] Also allow port to be overridden This can be helpful to avoid clashing with another locally-running postgres, for example. --- paradedb/localstack_paradedb/extension.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/paradedb/localstack_paradedb/extension.py b/paradedb/localstack_paradedb/extension.py index 40f3221..4191270 100644 --- a/paradedb/localstack_paradedb/extension.py +++ b/paradedb/localstack_paradedb/extension.py @@ -9,11 +9,13 @@ ENV_POSTGRES_USER = "PARADEDB_POSTGRES_USER" ENV_POSTGRES_PASSWORD = "PARADEDB_POSTGRES_PASSWORD" ENV_POSTGRES_DB = "PARADEDB_POSTGRES_DB" +ENV_POSTGRES_PORT = "PARADEDB_POSTGRES_PORT" # Default values DEFAULT_POSTGRES_USER = "myuser" DEFAULT_POSTGRES_PASSWORD = "mypassword" DEFAULT_POSTGRES_DB = "mydatabase" +DEFAULT_POSTGRES_PORT = 5432 class ParadeDbExtension(DatabaseDockerContainerExtension): @@ -21,14 +23,13 @@ class ParadeDbExtension(DatabaseDockerContainerExtension): # Name of the Docker image to spin up DOCKER_IMAGE = "paradedb/paradedb" - # Default port for PostgreSQL - POSTGRES_PORT = 5432 def __init__(self): # Get configuration from environment variables postgres_user = os.environ.get(ENV_POSTGRES_USER, DEFAULT_POSTGRES_USER) postgres_password = os.environ.get(ENV_POSTGRES_PASSWORD, DEFAULT_POSTGRES_PASSWORD) postgres_db = os.environ.get(ENV_POSTGRES_DB, DEFAULT_POSTGRES_DB) + postgres_port = int(os.environ.get(ENV_POSTGRES_PORT, DEFAULT_POSTGRES_PORT)) # Environment variables to pass to the container env_vars = { @@ -39,7 +40,7 @@ def __init__(self): super().__init__( image_name=self.DOCKER_IMAGE, - container_ports=[self.POSTGRES_PORT], + container_ports=[postgres_port], env_vars=env_vars, ) @@ -47,6 +48,7 @@ def __init__(self): self.postgres_user = postgres_user self.postgres_password = postgres_password self.postgres_db = postgres_db + self.postgres_port = postgres_port def get_connection_info(self) -> dict: """Return connection information for ParadeDB.""" @@ -55,10 +57,10 @@ def get_connection_info(self) -> dict: "database": self.postgres_db, "user": self.postgres_user, "password": self.postgres_password, - "port": self.POSTGRES_PORT, + "port": self.postgres_port, "connection_string": ( f"postgresql://{self.postgres_user}:{self.postgres_password}" - f"@{self.container_host}:{self.POSTGRES_PORT}/{self.postgres_db}" + f"@{self.container_host}:{self.postgres_port}/{self.postgres_db}" ), }) return info From 5ed49e50417ff987ba02a91d28bc6018e0a7a60b Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 18:28:18 +0000 Subject: [PATCH 05/10] [paradedb] Additional project keywords --- paradedb/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paradedb/pyproject.toml b/paradedb/pyproject.toml index d21cbc4..291d857 100644 --- a/paradedb/pyproject.toml +++ b/paradedb/pyproject.toml @@ -11,7 +11,7 @@ requires-python = ">=3.10" authors = [ { name = "LocalStack team"} ] -keywords = ["LocalStack", "ParadeDB", "PostgreSQL"] +keywords = ["LocalStack", "ParadeDB", "PostgreSQL", "Search", "Analytics"] classifiers = [] dependencies = [] From b2552070e58e989eef92a1545a73459ba029737c Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 18:29:49 +0000 Subject: [PATCH 06/10] [paradedb] Remove hallucinated project description --- paradedb/README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/paradedb/README.md b/paradedb/README.md index d45817a..496ab7f 100644 --- a/paradedb/README.md +++ b/paradedb/README.md @@ -37,11 +37,8 @@ conn = psycopg2.connect( ## ParadeDB Features -ParadeDB includes several powerful extensions: - -- **pg_search**: Full-text search with BM25 ranking -- **pg_analytics**: DuckDB-powered analytics for OLAP workloads -- **pg_lakehouse**: Query data lakes (S3, Delta Lake, Iceberg) directly +ParadeDB includes the **pg_search** extension, for both search and +analytics workloads. Example using pg_search: ```sql From 535795daa3def9dad065e4dbcc07875ccc804ce6 Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 18:37:19 +0000 Subject: [PATCH 07/10] [paradedb] Fix hallucinated example --- paradedb/README.md | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/paradedb/README.md b/paradedb/README.md index 496ab7f..46163b6 100644 --- a/paradedb/README.md +++ b/paradedb/README.md @@ -40,25 +40,23 @@ conn = psycopg2.connect( ParadeDB includes the **pg_search** extension, for both search and analytics workloads. -Example using pg_search: +Example of BM25 scoring, from the excellent [quickstart guide](https://docs.paradedb.com/documentation/getting-started/quickstart): + ```sql --- Create a table with search index -CREATE TABLE products ( - id SERIAL PRIMARY KEY, - name TEXT, - description TEXT +CALL paradedb.create_bm25_test_table( + schema_name => 'public', + table_name => 'mock_items' ); --- Create a BM25 search index -CALL paradedb.create_bm25( - index_name => 'products_idx', - table_name => 'products', - key_field => 'id', - text_fields => paradedb.field('name') || paradedb.field('description') -); +CREATE INDEX search_idx ON mock_items +USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range) +WITH (key_field='id'); --- Search with BM25 scoring -SELECT * FROM products.search('description:electronics'); +SELECT description, pdb.score(id) +FROM mock_items +WHERE description ||| 'running shoes' AND rating > 2 +ORDER BY score DESC +LIMIT 5; ``` ## Configuration From a7aa9747dc6bdf555c01ab611c7250cc99dcf90f Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 19:01:08 +0000 Subject: [PATCH 08/10] [paradedb] Just use the quickstart example as a test No need to exhaustively verify or showcase different aspects of paradedb here. --- paradedb/tests/test_extension.py | 289 ++++--------------------------- 1 file changed, 35 insertions(+), 254 deletions(-) diff --git a/paradedb/tests/test_extension.py b/paradedb/tests/test_extension.py index 06fa38f..bd1277e 100644 --- a/paradedb/tests/test_extension.py +++ b/paradedb/tests/test_extension.py @@ -35,290 +35,71 @@ def test_connect_to_paradedb(): conn.close() -def test_create_table_and_insert(): - """Test creating a table and inserting data.""" +def test_paradedb_quickstart(): + """Test some of ParadeDB's quickstart examples.""" conn = get_connection() cursor = conn.cursor() - table_name = f"test_table_{short_uid()}" - - try: - # Create table - cursor.execute(f""" - CREATE TABLE {table_name} ( - id SERIAL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT - ); - """) - conn.commit() - - # Insert data - cursor.execute(f""" - INSERT INTO {table_name} (name, description) - VALUES ('Product A', 'A great product'), - ('Product B', 'Another product'), - ('Product C', 'Yet another product'); - """) - conn.commit() - - # Query data - cursor.execute(f"SELECT * FROM {table_name} ORDER BY id;") - results = cursor.fetchall() - - assert len(results) == 3 - assert results[0][1] == "Product A" - assert results[1][1] == "Product B" - assert results[2][1] == "Product C" - - finally: - # Cleanup - cursor.execute(f"DROP TABLE IF EXISTS {table_name};") - conn.commit() - cursor.close() - conn.close() - - -def test_paradedb_pg_search_extension(): - """Test ParadeDB's pg_search extension for full-text search using v2 API.""" - conn = get_connection() - cursor = conn.cursor() - - table_name = f"products_{short_uid()}" + table_name = f"mock_items_{short_uid()}" index_name = f"{table_name}_idx" try: - # Create table + # Load sample data cursor.execute(f""" - CREATE TABLE {table_name} ( - id SERIAL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT + CALL paradedb.create_bm25_test_table( + schema_name => 'public', + table_name => '{table_name}' ); """) - conn.commit() - - # Insert sample data - cursor.execute(f""" - INSERT INTO {table_name} (name, description) VALUES - ('Laptop', 'High performance laptop with 16GB RAM and SSD storage'), - ('Smartphone', 'Latest smartphone with advanced camera features'), - ('Headphones', 'Wireless noise-canceling headphones for music lovers'), - ('Tablet', 'Portable tablet with retina display'), - ('Smartwatch', 'Fitness tracking smartwatch with heart rate monitor'); - """) - conn.commit() - # Create BM25 search index using ParadeDB v2 API (CREATE INDEX syntax) + # Create search index cursor.execute(f""" - CREATE INDEX {index_name} ON {table_name} - USING bm25 (id, name, description) + CREATE INDEX search_idx ON {table_name} + USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range) WITH (key_field='id'); """) - conn.commit() - - # Search for products containing 'wireless' using ||| operator (disjunction/OR) - cursor.execute(f""" - SELECT id, name, description - FROM {table_name} - WHERE description ||| 'wireless'; - """) - results = cursor.fetchall() - - assert len(results) >= 1 - assert any("Headphones" in row[1] for row in results) - # Search for products containing 'laptop' in name or description cursor.execute(f""" - SELECT id, name, description - FROM {table_name} - WHERE name ||| 'laptop' OR description ||| 'laptop'; + SELECT description, rating, category + FROM {table_name} + LIMIT 3; """) results = cursor.fetchall() + assert results == [ + ("Ergonomic metal keyboard", 4, "Electronics"), + ("Plastic Keyboard", 4, "Electronics"), + ("Sleek running shoes", 5, "Footwear"), + ] - assert len(results) >= 1 - assert any("Laptop" in row[1] for row in results) - - finally: - # Cleanup - drop index first, then table - cursor.execute(f"DROP INDEX IF EXISTS {index_name};") - cursor.execute(f"DROP TABLE IF EXISTS {table_name};") - conn.commit() - cursor.close() - conn.close() - - -def test_paradedb_search_with_scoring(): - """Test ParadeDB's BM25 search with relevance scoring.""" - conn = get_connection() - cursor = conn.cursor() - - table_name = f"docs_{short_uid()}" - index_name = f"{table_name}_idx" - - try: - # Create table with text content - cursor.execute(f""" - CREATE TABLE {table_name} ( - id SERIAL PRIMARY KEY, - title TEXT NOT NULL, - content TEXT - ); - """) - conn.commit() - - # Insert sample documents - cursor.execute(f""" - INSERT INTO {table_name} (title, content) VALUES - ('Introduction to Python', 'Python is a versatile programming language used for web development, data science, and automation.'), - ('JavaScript Basics', 'JavaScript is essential for front-end web development and can also be used on the server side with Node.js.'), - ('Database Design', 'Good database design is crucial for application performance and data integrity.'), - ('Machine Learning 101', 'Machine learning enables computers to learn from data without explicit programming.'), - ('Cloud Computing', 'Cloud computing provides on-demand access to computing resources over the internet.'); - """) - conn.commit() - - # Create search index using v2 API - cursor.execute(f""" - CREATE INDEX {index_name} ON {table_name} - USING bm25 (id, title, content) - WITH (key_field='id'); - """) - conn.commit() - - # Search for programming-related documents with scoring + # Match conjunction cursor.execute(f""" - SELECT id, title, pdb.score(id) as score + SELECT description, rating, category FROM {table_name} - WHERE content ||| 'programming' - ORDER BY score DESC; + WHERE description &&& 'running shoes' AND rating > 2 + ORDER BY rating + LIMIT 5; """) results = cursor.fetchall() + assert results == [("Sleek running shoes", 5, "Footwear")] - assert len(results) >= 1 - # Python and Machine Learning docs should match - titles = [row[1] for row in results] - assert any("Python" in t or "Machine Learning" in t for t in titles) - - finally: - # Cleanup - cursor.execute(f"DROP INDEX IF EXISTS {index_name};") - cursor.execute(f"DROP TABLE IF EXISTS {table_name};") - conn.commit() - cursor.close() - conn.close() - - -def test_paradedb_conjunction_search(): - """Test ParadeDB's conjunction (AND) search using &&& operator.""" - conn = get_connection() - cursor = conn.cursor() - - table_name = f"items_{short_uid()}" - index_name = f"{table_name}_idx" - - try: - # Create table - cursor.execute(f""" - CREATE TABLE {table_name} ( - id SERIAL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT - ); - """) - conn.commit() - - # Insert sample data - cursor.execute(f""" - INSERT INTO {table_name} (name, description) VALUES - ('Running Shoes', 'Lightweight running shoes for marathon training'), - ('Walking Shoes', 'Comfortable walking shoes for daily use'), - ('Running Gear', 'Essential gear for running enthusiasts'), - ('Basketball Shoes', 'High-top basketball shoes with ankle support'); - """) - conn.commit() - - # Create BM25 index - cursor.execute(f""" - CREATE INDEX {index_name} ON {table_name} - USING bm25 (id, name, description) - WITH (key_field='id'); - """) - conn.commit() - - # Search using conjunction (AND) - must contain both 'running' AND 'shoes' + # BM25 scoring cursor.execute(f""" - SELECT id, name, description + SELECT description, pdb.score(id) FROM {table_name} - WHERE name &&& 'running shoes'; + WHERE description ||| 'running shoes' AND rating > 2 + ORDER BY score DESC + LIMIT 5; """) results = cursor.fetchall() - - assert len(results) >= 1 - # Should match "Running Shoes" but not "Running Gear" or "Walking Shoes" - assert any("Running Shoes" in row[1] for row in results) - + assert results == [ + ("Sleek running shoes", 6.817111), + ("Generic shoes", 3.8772602), + ("White jogging shoes", 3.4849067), + ] finally: - # Cleanup + # Cleanup - drop index first, then table cursor.execute(f"DROP INDEX IF EXISTS {index_name};") cursor.execute(f"DROP TABLE IF EXISTS {table_name};") conn.commit() cursor.close() conn.close() - - -def test_standard_postgres_features(): - """Test that standard PostgreSQL features work correctly.""" - conn = get_connection() - cursor = conn.cursor() - - table_name = f"users_{short_uid()}" - - try: - # Create table with various PostgreSQL types - cursor.execute(f""" - CREATE TABLE {table_name} ( - id SERIAL PRIMARY KEY, - name VARCHAR(100) NOT NULL, - email VARCHAR(255) UNIQUE, - metadata JSONB, - tags TEXT[], - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ); - """) - conn.commit() - - # Insert data with JSONB and arrays - cursor.execute(f""" - INSERT INTO {table_name} (name, email, metadata, tags) - VALUES - ('Alice', 'alice@example.com', '{{"role": "admin", "level": 5}}', ARRAY['active', 'premium']), - ('Bob', 'bob@example.com', '{{"role": "user", "level": 2}}', ARRAY['active']), - ('Charlie', 'charlie@example.com', '{{"role": "user", "level": 3}}', ARRAY['inactive']); - """) - conn.commit() - - # Query with JSONB operators - cursor.execute(f""" - SELECT name FROM {table_name} - WHERE metadata->>'role' = 'admin'; - """) - results = cursor.fetchall() - assert len(results) == 1 - assert results[0][0] == "Alice" - - # Query with array operators - cursor.execute(f""" - SELECT name FROM {table_name} - WHERE 'premium' = ANY(tags); - """) - results = cursor.fetchall() - assert len(results) == 1 - assert results[0][0] == "Alice" - - finally: - # Cleanup - cursor.execute(f"DROP TABLE IF EXISTS {table_name};") - conn.commit() - cursor.close() - conn.close() From 6c8686fa3c0b7686f90a80586ab65b538818cb68 Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 19:07:49 +0000 Subject: [PATCH 09/10] [paradedb] Apply formatter --- paradedb/localstack_paradedb/extension.py | 26 +++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/paradedb/localstack_paradedb/extension.py b/paradedb/localstack_paradedb/extension.py index 4191270..845dbbc 100644 --- a/paradedb/localstack_paradedb/extension.py +++ b/paradedb/localstack_paradedb/extension.py @@ -27,7 +27,9 @@ class ParadeDbExtension(DatabaseDockerContainerExtension): def __init__(self): # Get configuration from environment variables postgres_user = os.environ.get(ENV_POSTGRES_USER, DEFAULT_POSTGRES_USER) - postgres_password = os.environ.get(ENV_POSTGRES_PASSWORD, DEFAULT_POSTGRES_PASSWORD) + postgres_password = os.environ.get( + ENV_POSTGRES_PASSWORD, DEFAULT_POSTGRES_PASSWORD + ) postgres_db = os.environ.get(ENV_POSTGRES_DB, DEFAULT_POSTGRES_DB) postgres_port = int(os.environ.get(ENV_POSTGRES_PORT, DEFAULT_POSTGRES_PORT)) @@ -53,14 +55,16 @@ def __init__(self): def get_connection_info(self) -> dict: """Return connection information for ParadeDB.""" info = super().get_connection_info() - info.update({ - "database": self.postgres_db, - "user": self.postgres_user, - "password": self.postgres_password, - "port": self.postgres_port, - "connection_string": ( - f"postgresql://{self.postgres_user}:{self.postgres_password}" - f"@{self.container_host}:{self.postgres_port}/{self.postgres_db}" - ), - }) + info.update( + { + "database": self.postgres_db, + "user": self.postgres_user, + "password": self.postgres_password, + "port": self.postgres_port, + "connection_string": ( + f"postgresql://{self.postgres_user}:{self.postgres_password}" + f"@{self.container_host}:{self.postgres_port}/{self.postgres_db}" + ), + } + ) return info From c0a9665fa8b6b3eb40903341fc175595ffd6f953 Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Wed, 28 Jan 2026 19:15:17 +0000 Subject: [PATCH 10/10] [paradedb] Add to main list of available extensions --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6cf4345..123d77d 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ You can install the respective extension by calling `localstack extensions insta | [Stripe](https://github.com/localstack/localstack-extensions/tree/main/stripe) | localstack-extension-stripe | 0.2.0 | Stable | | [Terraform Init](https://github.com/localstack/localstack-extensions/tree/main/terraform-init) | localstack-extension-terraform-init | 0.2.0 | Experimental | | [TypeDB](https://github.com/localstack/localstack-extensions/tree/main/typedb) | localstack-extension-typedb | 0.1.3 | Experimental | +| [ParadeDB](https://github.com/localstack/localstack-extensions/tree/main/paradedb) | localstack-extension-paradedb | 0.1.0 | Experimental | ## Developing Extensions