diff --git a/.github/wordlist.txt b/.github/wordlist.txt index d81fd19c..6885f25f 100644 --- a/.github/wordlist.txt +++ b/.github/wordlist.txt @@ -111,4 +111,13 @@ ulid coroutine compat programmatically -uv \ No newline at end of file +uv +RedisVL +embeddings +VectorQuery +VectorRangeQuery +SearchIndex +ADHOC +EF +DSL +DSL \ No newline at end of file diff --git a/aredis_om/redisvl.py b/aredis_om/redisvl.py new file mode 100644 index 00000000..4ab016b3 --- /dev/null +++ b/aredis_om/redisvl.py @@ -0,0 +1,237 @@ +""" +RedisVL integration for Redis OM. + +This module provides utilities to convert Redis OM models to RedisVL schemas, +enabling advanced vector search capabilities through RedisVL. + +Example: + from redis_om import JsonModel, Field, VectorFieldOptions + from aredis_om.redisvl import to_redisvl_schema, get_redisvl_index + + class Document(JsonModel, index=True): + title: str = Field(index=True) + embedding: list[float] = Field( + vector_options=VectorFieldOptions.flat( + type=VectorFieldOptions.TYPE.FLOAT32, + dimension=384, + distance_metric=VectorFieldOptions.DISTANCE_METRIC.COSINE, + ) + ) + + # Get a RedisVL IndexSchema for advanced operations + schema = to_redisvl_schema(Document) + + # Or get a ready-to-use SearchIndex + index = get_redisvl_index(Document) + results = await index.query(VectorQuery(...)) +""" + +from typing import Any, Dict, List, Optional, Type, Union + +from redisvl.index import AsyncSearchIndex, SearchIndex +from redisvl.schema import IndexSchema + +from .model.model import ( + FieldInfo, + JsonModel, + RedisModel, + VectorFieldOptions, + get_outer_type, + is_numeric_type, + is_supported_container_type, + should_index_field, +) + + +def _get_field_type( + field_name: str, + field_type: Any, + field_info: FieldInfo, + is_json: bool, +) -> Optional[Dict[str, Any]]: + """Convert an OM field to a RedisVL field definition.""" + if not should_index_field(field_info): + return None + + vector_options: Optional[VectorFieldOptions] = getattr( + field_info, "vector_options", None + ) + sortable = getattr(field_info, "sortable", False) is True + full_text_search = getattr(field_info, "full_text_search", False) is True + case_sensitive = getattr(field_info, "case_sensitive", False) is True + + # Vector field + if vector_options: + attrs = { + "dims": vector_options.dimension, + "distance_metric": vector_options.distance_metric.name.lower(), + "algorithm": vector_options.algorithm.name.lower(), + "datatype": vector_options.type.name.lower(), + } + if vector_options.initial_cap: + attrs["initial_cap"] = vector_options.initial_cap + is_flat = vector_options.algorithm.name == "FLAT" + if is_flat and vector_options.block_size: + attrs["block_size"] = vector_options.block_size + if vector_options.algorithm.name == "HNSW": + if vector_options.m: + attrs["m"] = vector_options.m + if vector_options.ef_construction: + attrs["ef_construction"] = vector_options.ef_construction + if vector_options.ef_runtime: + attrs["ef_runtime"] = vector_options.ef_runtime + if vector_options.epsilon: + attrs["epsilon"] = vector_options.epsilon + return {"name": field_name, "type": "vector", "attrs": attrs} + + # Numeric field + if is_numeric_type(field_type): + attrs = {"sortable": sortable} + return {"name": field_name, "type": "numeric", "attrs": attrs} + + # Boolean - stored as TAG + if field_type is bool: + return {"name": field_name, "type": "tag"} + + # String field + if isinstance(field_type, type) and issubclass(field_type, str): + if full_text_search: + attrs = {"sortable": sortable} + return {"name": field_name, "type": "text", "attrs": attrs} + else: + attrs = {"sortable": sortable, "case_sensitive": case_sensitive} + return {"name": field_name, "type": "tag", "attrs": attrs} + + # List of strings -> TAG + if is_supported_container_type(field_type): + from typing import get_args + + inner_types = get_args(field_type) + if inner_types and inner_types[0] is str: + attrs = {"sortable": sortable} + return {"name": field_name, "type": "tag", "attrs": attrs} + + # Default to tag for unknown types + return {"name": field_name, "type": "tag"} + + +def to_redisvl_schema(model_cls: Type[RedisModel]) -> "IndexSchema": + """ + Convert a Redis OM model to a RedisVL IndexSchema. + + This allows you to use RedisVL's advanced query capabilities with your + Redis OM models, including: + - VectorQuery with hybrid policies (BATCHES, ADHOC_BF) + - VectorRangeQuery for epsilon-based searches + - Advanced filter expressions + - EF_RUNTIME tuning for HNSW indexes + + Args: + model_cls: A HashModel or JsonModel class with index=True + + Returns: + A RedisVL IndexSchema that can be used with SearchIndex + + Raises: + ValueError: If the model is not indexed + + Example: + schema = to_redisvl_schema(MyModel) + index = SearchIndex(schema=schema, redis_client=redis) + results = await index.query(VectorQuery(...)) + """ + # Check if model is indexed + # model_config is a dict in Pydantic v2 + model_config = getattr(model_cls, "model_config", {}) + if isinstance(model_config, dict): + is_indexed = model_config.get("index", False) + else: + is_indexed = False + if not is_indexed: + raise ValueError( + f"Model {model_cls.__name__} is not indexed. " + "Use 'class MyModel(JsonModel, index=True):' to enable indexing." + ) + + # Determine storage type + is_json = issubclass(model_cls, JsonModel) + storage_type = "json" if is_json else "hash" + + # Get index name and prefix + index_name = model_cls.Meta.index_name + key_prefix = model_cls.make_key("") + + # Build field definitions + fields: List[Dict[str, Any]] = [] + + for name, field in model_cls.model_fields.items(): + field_type = get_outer_type(field) + if field_type is None: + continue + + # Get FieldInfo (may be wrapped in metadata) + if ( + not isinstance(field, FieldInfo) + and hasattr(field, "metadata") + and len(field.metadata) > 0 + and isinstance(field.metadata[0], FieldInfo) + ): + field_info = field.metadata[0] + elif isinstance(field, FieldInfo): + field_info = field + else: + continue + + field_def = _get_field_type(name, field_type, field_info, is_json) + if field_def: + fields.append(field_def) + + # Build schema dict + schema_dict = { + "index": { + "name": index_name, + "prefix": key_prefix, + "storage_type": storage_type, + }, + "fields": fields, + } + + return IndexSchema.from_dict(schema_dict) + + +def get_redisvl_index( + model_cls: Type[RedisModel], + async_client: bool = True, +) -> Union["AsyncSearchIndex", "SearchIndex"]: + """ + Get a RedisVL SearchIndex for a Redis OM model. + + This provides a ready-to-use SearchIndex connected to the model's + Redis database, enabling advanced vector search operations. + + Args: + model_cls: A HashModel or JsonModel class with index=True + async_client: If True (default), return AsyncSearchIndex. + If False, return sync SearchIndex. + + Returns: + A RedisVL SearchIndex (async or sync) connected to Redis + + Raises: + ValueError: If the model is not indexed + + Example: + index = get_redisvl_index(MyModel) + results = await index.query(VectorQuery( + vector=query_embedding, + vector_field_name="embedding", + num_results=10, + )) + """ + schema = to_redisvl_schema(model_cls) + redis_client = model_cls.db() + + if async_client: + return AsyncSearchIndex(schema=schema, redis_client=redis_client) + else: + return SearchIndex(schema=schema, redis_client=redis_client) diff --git a/docs/models.md b/docs/models.md index 24f6866c..583b7abf 100644 --- a/docs/models.md +++ b/docs/models.md @@ -352,6 +352,162 @@ redis = get_redis_connection() Migrator().run() ``` +## Vector Fields + +Redis OM supports vector fields for similarity search, enabling AI and machine learning applications. Vector fields store embeddings (arrays of floats) and can be searched using K-Nearest Neighbors (KNN) queries. + +### Defining Vector Fields + +Use `VectorFieldOptions` to configure vector fields: + +```python +from redis_om import JsonModel, Field, VectorFieldOptions + +class Document(JsonModel, index=True): + title: str = Field(index=True) + content: str = Field(full_text_search=True) + embedding: list[float] = Field( + vector_options=VectorFieldOptions.flat( + type=VectorFieldOptions.TYPE.FLOAT32, + dimension=384, # Must match your embedding model's output + distance_metric=VectorFieldOptions.DISTANCE_METRIC.COSINE, + ) + ) +``` + +### Vector Algorithm Options + +Redis OM supports two vector indexing algorithms: + +**FLAT** - Brute-force search, best for smaller datasets: + +```python +vector_options = VectorFieldOptions.flat( + type=VectorFieldOptions.TYPE.FLOAT32, + dimension=768, + distance_metric=VectorFieldOptions.DISTANCE_METRIC.COSINE, + initial_cap=1000, # Optional: pre-allocate space + block_size=1000, # Optional: memory block size +) +``` + +**HNSW** - Approximate search, best for larger datasets: + +```python +vector_options = VectorFieldOptions.hnsw( + type=VectorFieldOptions.TYPE.FLOAT32, + dimension=768, + distance_metric=VectorFieldOptions.DISTANCE_METRIC.COSINE, + initial_cap=1000, # Optional: pre-allocate space + m=16, # Optional: max outgoing edges per node + ef_construction=200, # Optional: construction-time search width + ef_runtime=10, # Optional: query-time search width + epsilon=0.01, # Optional: relative factor for range queries +) +``` + +### Distance Metrics + +- `COSINE` - Cosine similarity (most common for text embeddings) +- `L2` - Euclidean distance +- `IP` - Inner product + +### Vector Data Types + +- `FLOAT32` - 32-bit floating point (most common) +- `FLOAT64` - 64-bit floating point + +### Querying Vector Fields + +Use `KNNExpression` to perform similarity searches: + +```python +from redis_om import KNNExpression + +# Create a query vector (from your embedding model) +query_embedding = get_embedding("search query") + +# Find the 10 most similar documents +results = await Document.find( + KNNExpression( + k=10, + vector_field_name="embedding", + reference_vector=query_embedding, + ) +).all() +``` + +### Hybrid Queries + +Combine vector search with filters: + +```python +# Find similar documents within a category +results = await Document.find( + (Document.category == "technology") & + KNNExpression( + k=10, + vector_field_name="embedding", + reference_vector=query_embedding, + ) +).all() +``` + +### Advanced Vector Search with RedisVL + +For advanced vector search capabilities, Redis OM integrates with [RedisVL](https://github.com/redis/redis-vl-python). This gives you access to: + +- VectorQuery with hybrid policies (BATCHES, ADHOC_BF) +- VectorRangeQuery for epsilon-based searches +- Advanced filter expressions +- EF_RUNTIME tuning for HNSW indexes + +#### Converting Models to RedisVL Schema + +Use `to_redisvl_schema()` to convert your Redis OM model to a RedisVL `IndexSchema`: + +```python +from aredis_om.redisvl import to_redisvl_schema +from redisvl.index import SearchIndex + +# Convert your model to a RedisVL schema +schema = to_redisvl_schema(Document) + +# Use with RedisVL's SearchIndex +index = SearchIndex(schema=schema, redis_client=redis) +``` + +#### Getting a Ready-to-Use SearchIndex + +Use `get_redisvl_index()` to get a RedisVL `SearchIndex` connected to your model's database: + +```python +from aredis_om.redisvl import get_redisvl_index +from redisvl.query import VectorQuery + +# Get a RedisVL index for your model +index = get_redisvl_index(Document) + +# Use RedisVL's advanced query features +results = await index.query(VectorQuery( + vector=query_embedding, + vector_field_name="embedding", + num_results=10, + return_fields=["title", "content"], +)) +``` + +#### When to Use RedisVL Integration + +Use the RedisVL integration when you need: + +- **Hybrid search policies**: Control how filters and vector search interact +- **Range queries**: Find all vectors within a distance threshold +- **Runtime tuning**: Adjust HNSW `ef_runtime` per query +- **Advanced filters**: Complex filter expressions beyond Redis OM's query DSL + +For most use cases, Redis OM's built-in `KNNExpression` is sufficient. The RedisVL integration is an escape hatch for advanced scenarios. + ## Field Projection Redis OM supports field projection, which allows you to retrieve only specific fields from your models rather than loading all fields. This can improve performance and reduce memory usage, especially for models with many fields. diff --git a/docs/release_notes_1.0.md b/docs/release_notes_1.0.md index 0010cfff..2f8bea4d 100644 --- a/docs/release_notes_1.0.md +++ b/docs/release_notes_1.0.md @@ -44,6 +44,20 @@ Return only specific fields from queries: names = await User.find().only("name", "email").all() ``` +### RedisVL Integration +Redis OM now includes [RedisVL](https://github.com/redis/redis-vl-python) as a dependency, providing advanced vector search capabilities: +```python +from aredis_om.redisvl import get_redisvl_index +from redisvl.query import VectorQuery + +index = get_redisvl_index(MyModel) +results = await index.query(VectorQuery( + vector=query_embedding, + vector_field_name="embedding", + num_results=10, +)) +``` + ### py.typed Marker Full mypy compatibility with PEP 561 py.typed marker. diff --git a/make_sync.py b/make_sync.py index 9d74972a..168910f2 100644 --- a/make_sync.py +++ b/make_sync.py @@ -15,6 +15,8 @@ "pytest_asyncio": "pytest", "py_test_mark_asyncio": "py_test_mark_sync", "AsyncMock": "Mock", + # RedisVL uses SearchIndex for sync, not SyncSearchIndex + "AsyncSearchIndex": "SearchIndex", } diff --git a/pyproject.toml b/pyproject.toml index db9cbc88..225c4294 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "more-itertools>=8.14,<11.0", "setuptools>=70.0", "pydantic-extra-types>=2.10.5", + "redisvl>=0.13.0", ] [project.urls] diff --git a/tests/test_redisvl_integration.py b/tests/test_redisvl_integration.py new file mode 100644 index 00000000..c61ca60e --- /dev/null +++ b/tests/test_redisvl_integration.py @@ -0,0 +1,156 @@ +# type: ignore +"""Tests for RedisVL integration module.""" + +import abc +from typing import Optional + +import pytest +import pytest_asyncio + +from aredis_om import Field, HashModel, JsonModel, Migrator, VectorFieldOptions +from aredis_om.redisvl import get_redisvl_index, to_redisvl_schema +from redisvl.index import AsyncSearchIndex, SearchIndex +from redisvl.schema import IndexSchema + +# We need to run this check as sync code (during tests) even in async mode +# because we call it in the top-level module scope. +from redis_om import has_redis_json + +from .conftest import py_test_mark_asyncio + +if not has_redis_json(): + pytestmark = pytest.mark.skip + + +DIMENSIONS = 128 + + +@pytest_asyncio.fixture +async def json_model_with_vector(key_prefix, redis): + """JsonModel with vector field for testing.""" + + class BaseJsonModel(JsonModel, abc.ABC): + class Meta: + global_key_prefix = key_prefix + database = redis + + vector_options = VectorFieldOptions.flat( + type=VectorFieldOptions.TYPE.FLOAT32, + dimension=DIMENSIONS, + distance_metric=VectorFieldOptions.DISTANCE_METRIC.COSINE, + ) + + class Document(BaseJsonModel, index=True): + title: str = Field(index=True) + category: str = Field(index=True) + content: str = Field(full_text_search=True) + views: int = Field(index=True, sortable=True) + embedding: list[float] = Field([], vector_options=vector_options) + + await Migrator(conn=redis).run() + + return Document + + +@pytest_asyncio.fixture +async def hash_model_indexed(key_prefix, redis): + """HashModel with indexed fields for testing.""" + + class BaseHashModel(HashModel, abc.ABC): + class Meta: + global_key_prefix = key_prefix + database = redis + + class Product(BaseHashModel, index=True): + name: str = Field(index=True) + description: str = Field(full_text_search=True) + price: float = Field(index=True, sortable=True) + in_stock: bool = Field(index=True) + + await Migrator(conn=redis).run() + + return Product + + +@pytest_asyncio.fixture +async def non_indexed_model(key_prefix, redis): + """Model without index=True for testing error cases.""" + + class BaseJsonModel(JsonModel, abc.ABC): + class Meta: + global_key_prefix = key_prefix + database = redis + + class SimpleModel(BaseJsonModel): + name: str + + return SimpleModel + + +@py_test_mark_asyncio +async def test_to_redisvl_schema_json_model(json_model_with_vector): + """Test converting a JsonModel to RedisVL schema.""" + Document = json_model_with_vector + + schema = to_redisvl_schema(Document) + + assert isinstance(schema, IndexSchema) + assert schema.index.name == Document.Meta.index_name + assert schema.index.storage_type.value == "json" + + # Check fields are present + field_names = list(schema.fields.keys()) + assert "title" in field_names + assert "category" in field_names + assert "content" in field_names + assert "views" in field_names + assert "embedding" in field_names + + +@py_test_mark_asyncio +async def test_to_redisvl_schema_hash_model(hash_model_indexed): + """Test converting a HashModel to RedisVL schema.""" + Product = hash_model_indexed + + schema = to_redisvl_schema(Product) + + assert isinstance(schema, IndexSchema) + assert schema.index.storage_type.value == "hash" + + field_names = list(schema.fields.keys()) + assert "name" in field_names + assert "description" in field_names + assert "price" in field_names + assert "in_stock" in field_names + + +@py_test_mark_asyncio +async def test_to_redisvl_schema_non_indexed_raises(non_indexed_model): + """Test that non-indexed models raise ValueError.""" + SimpleModel = non_indexed_model + + with pytest.raises(ValueError, match="is not indexed"): + to_redisvl_schema(SimpleModel) + + +@py_test_mark_asyncio +async def test_get_redisvl_index_async(json_model_with_vector): + """Test getting an async RedisVL index.""" + Document = json_model_with_vector + + index = get_redisvl_index(Document, async_client=True) + + assert isinstance(index, AsyncSearchIndex) + assert index.schema.index.name == Document.Meta.index_name + + +@py_test_mark_asyncio +async def test_get_redisvl_index_sync(json_model_with_vector): + """Test getting a sync RedisVL index.""" + Document = json_model_with_vector + + index = get_redisvl_index(Document, async_client=False) + + assert isinstance(index, SearchIndex) + assert index.schema.index.name == Document.Meta.index_name +