-
Notifications
You must be signed in to change notification settings - Fork 74
feat: add serialization helpers for SearchIndex and AsyncSearchIndex #525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,13 @@ | ||
| import asyncio | ||
| import json | ||
| import os | ||
| import threading | ||
| import time | ||
| import warnings | ||
| import weakref | ||
| from urllib.parse import urlparse, urlunparse | ||
|
|
||
| import yaml | ||
| from math import ceil | ||
| from typing import ( | ||
| TYPE_CHECKING, | ||
|
|
@@ -1294,6 +1298,69 @@ def info(self, name: Optional[str] = None) -> Dict[str, Any]: | |
| index_name = name or self.schema.index.name | ||
| return self._info(index_name, self._redis_client) | ||
|
|
||
| def _sanitize_redis_url(self, url: Optional[str]) -> Optional[str]: | ||
| """Remove password from Redis URL for safe serialization.""" | ||
| if url is None: | ||
| return None | ||
| parsed = urlparse(url) | ||
| # Replace password with **** | ||
| netloc = parsed.hostname or "" | ||
| if parsed.port: | ||
| netloc = f"{netloc}:{parsed.port}" | ||
| if parsed.username: | ||
| netloc = f"{parsed.username}:****@{netloc}" | ||
| return urlunparse((parsed.scheme, netloc, parsed.path, "", "", "")) | ||
|
|
||
| def to_dict(self, include_connection: bool = False) -> Dict[str, Any]: | ||
| """Serialize the index configuration to a dictionary. | ||
|
|
||
| Args: | ||
| include_connection (bool, optional): Whether to include connection | ||
| parameters. Defaults to False for security (passwords/URLs | ||
| are excluded by default). | ||
|
|
||
| Returns: | ||
| Dict[str, Any]: Dictionary representation of the index configuration. | ||
|
|
||
| Example: | ||
| >>> config = index.to_dict() | ||
| >>> new_index = SearchIndex.from_dict(config) | ||
| """ | ||
| config = self.schema.to_dict() | ||
| if include_connection: | ||
| # Sanitize URL to remove password | ||
| sanitized_url = self._sanitize_redis_url(self._redis_url) | ||
| if sanitized_url is not None: | ||
| config["_redis_url"] = sanitized_url | ||
| # Only include non-sensitive connection kwargs | ||
| safe_keys = {"decode_responses", "ssl", "socket_timeout", "socket_connect_timeout"} | ||
| safe_kwargs = { | ||
| k: v for k, v in self._connection_kwargs.items() | ||
| if k in safe_keys | ||
| } | ||
| if safe_kwargs: | ||
| config["_connection_kwargs"] = safe_kwargs | ||
| return config | ||
|
Comment on lines
+1329
to
+1343
|
||
|
|
||
| def to_yaml(self, path: str, include_connection: bool = False, overwrite: bool = True) -> None: | ||
| """Serialize the index configuration to a YAML file. | ||
|
|
||
| Args: | ||
| path (str): Path to write the YAML file. | ||
| include_connection (bool, optional): Whether to include connection | ||
| parameters. Defaults to False for security. | ||
| overwrite (bool, optional): Whether to overwrite existing file. | ||
| Defaults to True. If False and file exists, raises FileExistsError. | ||
|
|
||
| Example: | ||
| >>> index.to_yaml("schemas/my_index.yaml") | ||
| """ | ||
| if not overwrite and os.path.exists(path): | ||
| raise FileExistsError(f"File already exists: {path}") | ||
| config = self.to_dict(include_connection=include_connection) | ||
| with open(path, "w") as f: | ||
| yaml.dump(config, f, default_flow_style=False, sort_keys=False) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Identical serialization methods duplicated across both index classesLow Severity
Additional Locations (1) |
||
|
|
||
| def __enter__(self): | ||
| return self | ||
|
|
||
|
|
@@ -2251,6 +2318,56 @@ def disconnect_sync(self): | |
| return | ||
| sync_wrapper(self.disconnect)() | ||
|
|
||
| def to_dict(self, include_connection: bool = False) -> Dict[str, Any]: | ||
| """Serialize the index configuration to a dictionary. | ||
|
|
||
| Args: | ||
| include_connection (bool, optional): Whether to include connection | ||
| parameters. Defaults to False for security (passwords/URLs | ||
| are excluded by default). | ||
|
|
||
| Returns: | ||
| Dict[str, Any]: Dictionary representation of the index configuration. | ||
|
|
||
| Example: | ||
| >>> config = index.to_dict() | ||
| >>> new_index = AsyncSearchIndex.from_dict(config) | ||
| """ | ||
| config = self.schema.to_dict() | ||
| if include_connection: | ||
| # Sanitize URL to remove password | ||
| sanitized_url = self._sanitize_redis_url(self._redis_url) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| if sanitized_url is not None: | ||
| config["_redis_url"] = sanitized_url | ||
| # Only include non-sensitive connection kwargs | ||
| safe_keys = {"decode_responses", "ssl", "socket_timeout", "socket_connect_timeout"} | ||
| safe_kwargs = { | ||
| k: v for k, v in self._connection_kwargs.items() | ||
| if k in safe_keys | ||
| } | ||
| if safe_kwargs: | ||
| config["_connection_kwargs"] = safe_kwargs | ||
| return config | ||
|
|
||
| def to_yaml(self, path: str, include_connection: bool = False, overwrite: bool = True) -> None: | ||
| """Serialize the index configuration to a YAML file. | ||
|
|
||
| Args: | ||
| path (str): Path to write the YAML file. | ||
| include_connection (bool, optional): Whether to include connection | ||
| parameters. Defaults to False for security. | ||
| overwrite (bool, optional): Whether to overwrite existing file. | ||
| Defaults to True. If False and file exists, raises FileExistsError. | ||
|
|
||
| Example: | ||
| >>> index.to_yaml("schemas/my_index.yaml") | ||
| """ | ||
| if not overwrite and os.path.exists(path): | ||
| raise FileExistsError(f"File already exists: {path}") | ||
| config = self.to_dict(include_connection=include_connection) | ||
| with open(path, "w") as f: | ||
| yaml.dump(config, f, default_flow_style=False, sort_keys=False) | ||
|
Comment on lines
+2321
to
+2369
|
||
|
|
||
| async def __aenter__(self): | ||
| return self | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,166 @@ | ||
| """Tests for SearchIndex serialization helpers.""" | ||
| import tempfile | ||
| from pathlib import Path | ||
|
|
||
| import pytest | ||
|
|
||
| from redisvl.index import AsyncSearchIndex, SearchIndex | ||
| from redisvl.schema import IndexSchema | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def sample_schema(): | ||
| """Create a sample schema for testing.""" | ||
| return IndexSchema.from_dict({ | ||
| "index": { | ||
| "name": "test_index", | ||
| "prefix": "test:", | ||
| "storage_type": "hash", | ||
| }, | ||
| "fields": [ | ||
| {"name": "text", "type": "text"}, | ||
| {"name": "vector", "type": "vector", "attrs": {"dims": 128, "algorithm": "flat"}}, | ||
| ] | ||
| }) | ||
|
|
||
|
|
||
| class TestSearchIndexSerialization: | ||
| """Tests for SearchIndex serialization methods.""" | ||
|
|
||
| def test_to_dict_without_connection(self, sample_schema): | ||
| """Test to_dict() excludes connection info by default.""" | ||
| index = SearchIndex( | ||
| schema=sample_schema, | ||
| redis_url="redis://localhost:6379", | ||
| ) | ||
|
|
||
| config = index.to_dict() | ||
|
|
||
| assert "index" in config | ||
| assert config["index"]["name"] == "test_index" | ||
| assert "_redis_url" not in config | ||
| assert "_connection_kwargs" not in config | ||
|
|
||
| def test_to_dict_with_connection(self, sample_schema): | ||
| """Test to_dict() includes connection info when requested.""" | ||
| index = SearchIndex( | ||
| schema=sample_schema, | ||
| redis_url="redis://localhost:6379", | ||
| connection_kwargs={"ssl": True, "socket_timeout": 30, "password": "secret"}, | ||
| ) | ||
|
|
||
| config = index.to_dict(include_connection=True) | ||
|
|
||
| assert "_redis_url" in config | ||
| assert config["_redis_url"] == "redis://localhost:6379" | ||
| # Password should not be included (not in safe_keys) | ||
| assert "password" not in config.get("_connection_kwargs", {}) | ||
| # Safe keys should be included | ||
| assert config["_connection_kwargs"]["ssl"] is True | ||
| assert config["_connection_kwargs"]["socket_timeout"] == 30 | ||
|
|
||
| def test_to_yaml_without_connection(self, sample_schema): | ||
| """Test to_yaml() writes schema to YAML file.""" | ||
| index = SearchIndex(schema=sample_schema) | ||
|
|
||
| with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: | ||
| path = f.name | ||
|
|
||
| try: | ||
| index.to_yaml(path) | ||
|
|
||
| content = Path(path).read_text() | ||
| assert "test_index" in content | ||
| assert "text" in content | ||
| assert "vector" in content | ||
| finally: | ||
| Path(path).unlink() | ||
|
|
||
| def test_to_yaml_with_connection(self, sample_schema): | ||
| """Test to_yaml() includes connection when requested.""" | ||
| index = SearchIndex( | ||
| schema=sample_schema, | ||
| redis_url="redis://localhost:6379", | ||
| ) | ||
|
|
||
| with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: | ||
| path = f.name | ||
|
|
||
| try: | ||
| index.to_yaml(path, include_connection=True) | ||
|
|
||
| content = Path(path).read_text() | ||
| assert "_redis_url" in content | ||
| finally: | ||
| Path(path).unlink() | ||
|
|
||
| def test_roundtrip_dict(self, sample_schema): | ||
| """Test that to_dict() and from_dict() roundtrip correctly.""" | ||
| original_index = SearchIndex( | ||
| schema=sample_schema, | ||
| connection_kwargs={"ssl": True}, | ||
| ) | ||
|
|
||
| config = original_index.to_dict() | ||
| restored_index = SearchIndex.from_dict(config) | ||
|
|
||
| assert restored_index.schema.index.name == original_index.schema.index.name | ||
| assert restored_index.schema.index.prefix == original_index.schema.index.prefix | ||
|
Comment on lines
+97
to
+108
|
||
|
|
||
|
|
||
| class TestAsyncSearchIndexSerialization: | ||
| """Tests for AsyncSearchIndex serialization methods.""" | ||
|
|
||
| def test_to_dict_without_connection(self, sample_schema): | ||
| """Test to_dict() excludes connection info by default.""" | ||
| index = AsyncSearchIndex( | ||
| schema=sample_schema, | ||
| redis_url="redis://localhost:6379", | ||
| ) | ||
|
|
||
| config = index.to_dict() | ||
|
|
||
| assert "index" in config | ||
| assert config["index"]["name"] == "test_index" | ||
| assert "_redis_url" not in config | ||
|
|
||
| def test_to_dict_with_connection(self, sample_schema): | ||
| """Test to_dict() includes connection info when requested.""" | ||
| index = AsyncSearchIndex( | ||
| schema=sample_schema, | ||
| redis_url="redis://localhost:6379", | ||
| connection_kwargs={"ssl": True, "password": "secret"}, | ||
| ) | ||
|
|
||
| config = index.to_dict(include_connection=True) | ||
|
|
||
| assert "_redis_url" in config | ||
| # Password should not be included (not in safe_keys) | ||
| assert "password" not in config.get("_connection_kwargs", {}) | ||
|
|
||
| def test_to_yaml(self, sample_schema): | ||
| """Test to_yaml() writes schema to YAML file.""" | ||
| index = AsyncSearchIndex(schema=sample_schema) | ||
|
|
||
| with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: | ||
| path = f.name | ||
|
|
||
| try: | ||
| index.to_yaml(path) | ||
|
|
||
| content = Path(path).read_text() | ||
| assert "test_index" in content | ||
| finally: | ||
| Path(path).unlink() | ||
|
|
||
| def test_roundtrip_dict(self, sample_schema): | ||
| """Test that to_dict() and from_dict() roundtrip correctly.""" | ||
| original_index = AsyncSearchIndex( | ||
| schema=sample_schema, | ||
| connection_kwargs={"ssl": True}, | ||
| ) | ||
|
|
||
| config = original_index.to_dict() | ||
| restored_index = AsyncSearchIndex.from_dict(config) | ||
|
|
||
| assert restored_index.schema.index.name == original_index.schema.index.name | ||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Password-only Redis URLs lose all auth indication silently
Medium Severity
_sanitize_redis_urlonly adds the****password mask whenparsed.usernameis truthy. The most common Redis auth URL pattern isredis://:password@localhost:6379, whereparsed.usernameis an empty string (falsy). This causes the entire auth section to be silently dropped, producingredis://localhost:6379— with no****indicator that authentication is configured. The condition needs to also checkparsed.passwordso password-only URLs are properly sanitized.