redis · jeremyplichta · Feb 11, 2026 · Feb 11, 2026 · Feb 13, 2026 · Feb 13, 2026
diff --git a/Makefile b/Makefile
@@ -8,14 +8,14 @@ install: ## Install the project and all dependencies
 	@echo "🚀 Installing project dependencies with uv"
 	uv sync --all-extras
 
-redis-start: ## Start Redis Stack in Docker
-	@echo "🐳 Starting Redis Stack"
-	docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
-
-redis-stop: ## Stop Redis Stack Docker container
-	@echo "🛑 Stopping Redis Stack"
-	docker stop redis-stack || true
-	docker rm redis-stack || true
+redis-start: ## Start Redis in Docker
+	@echo "🐳 Starting Redis"
+	docker run -d --name redis -p 6379:6379 redis:latest
-	docker run -d --name redis -p 6379:6379 redis:latest
+	docker run -d --name redis -p 6379:6379 redis:latest
+	@docker exec -it redis redis-cli INFO server | grep redis_version
-	docker run -d --name redis -p 6379:6379 redis:latest
+	docker run -d --name redis -p 6379:6379 redis:latest
+	@docker exec -it redis redis-cli INFO server | grep redis_version
+
+redis-stop: ## Stop Redis Docker container
+	@echo "🛑 Stopping Redis"
+	docker stop redis || true
+	docker rm redis || true
 
 format: ## Format code with isort and black
 	@echo "🎨 Formatting code"

diff --git a/README.md b/README.md
@@ -64,15 +64,15 @@ Choose from multiple Redis deployment options:
 </details>
 
 <details>
-<summary><b>Redis Stack</b> - Docker image for development</summary>
+<summary><b>Docker</b> - Local development</summary>
 
-Run Redis Stack locally using Docker:
+Run Redis locally using Docker:
 
 ```bash
-docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
+docker run -d --name redis -p 6379:6379 redis:latest
 ```
 
-This includes Redis with vector search capabilities and Redis Insight GUI.
+This runs Redis 8+ with built-in vector search capabilities.
 
 </details>
 

diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css
@@ -1,3 +1,8 @@
 .logo__image {
     transform: scale(.7);
+}
+
+/* Show the primary sidebar on the landing page (override hide-on-wide) */
+.bd-sidebar-primary.hide-on-wide {
+    display: flex !important;
 }
diff --git a/docs/_static/css/sidebar.css b/docs/_static/css/sidebar.css
@@ -1,23 +1,52 @@
+/* Home page sidebar styling */
+.custom-home-sidebar .bd-links__title {
+    font-size: 0.95rem;
+    font-weight: 600;
+    letter-spacing: 0.02em;
+    margin-bottom: 0.75rem;
+    padding-bottom: 0.5rem;
+    border-bottom: 1px solid var(--pst-color-border);
+}
+
+/* Sidebar navigation links */
+.custom-home-sidebar .toctree-l1 > a {
+    font-weight: 500;
+    padding: 0.4rem 0.5rem;
+    border-radius: 4px;
+    transition: background-color 0.15s ease;
+}
+
+.custom-home-sidebar .toctree-l1 > a:hover {
+    background-color: var(--pst-color-surface);
+}
+
+/* Nested items */
+.custom-home-sidebar .toctree-l2 > a {
+    font-size: 0.875rem;
+    color: var(--pst-color-text-muted);
+}
+
+.custom-home-sidebar .toctree-l2 > a:hover {
+    color: var(--pst-color-text-base);
+}
+
+/* Legacy custom_sidebar class */
 .custom_sidebar {
     width: auto;
     background-color: inherit;
-
 }
 
-
-/* Style the sidebar links */
 .custom_sidebar ul {
     list-style-type: none;
     padding: 6px;
 }
 
-/* Style the sidebar links */
 .custom_sidebar li {
     list-style-type: none;
     padding: 5px;
 }
 
 .custom_sidebar a {
-    text-decoration: none; /* Removes underline */
+    text-decoration: none;
 }
 
diff --git a/docs/_static/redisvl-architecture.svg b/docs/_static/redisvl-architecture.svg
diff --git a/docs/_templates/sidebar-nav-custom.html b/docs/_templates/sidebar-nav-custom.html
@@ -0,0 +1,6 @@
+{# Custom sidebar navigation for the landing page #}
+<div class="bd-toc-item navbar-nav custom-home-sidebar">
+  <p class="bd-links__title" role="heading" aria-level="1">📚 Docs</p>
+  {{ toctree(maxdepth=2, collapse=False, includehidden=True, titles_only=True) }}
+</div>
+
diff --git a/docs/api/index.md b/docs/api/index.md
@@ -20,7 +20,6 @@ query
 filter
 vectorizer
 reranker
-utils
 cache
 message_history
 router

diff --git a/docs/api/query.rst b/docs/api/query.rst
@@ -106,7 +106,7 @@ VectorRangeQuery
       )
 
 AggregateHybridQuery
-================
+====================
 
 
 .. currentmodule:: redisvl.query
@@ -221,7 +221,7 @@ CountQuery
 
 
 MultiVectorQuery
-==========
+================
 
 .. currentmodule:: redisvl.query
 

diff --git a/docs/api/vector.rst b/docs/api/vector.rst
@@ -1,7 +1,7 @@
 
-*****
+******
 Vector
-*****
+******
 
 The Vector class in RedisVL is a container that encapsulates a numerical vector, it's datatype, corresponding index field name, and optional importance weight. It is used when constructing multi-vector queries using the MultiVectorQuery class.
 

diff --git a/docs/concepts/architecture.md b/docs/concepts/architecture.md
@@ -0,0 +1,71 @@
+---
+myst:
+  html_meta:
+    "description lang=en": |
+      RedisVL architecture - how the library structures vector search on Redis.
+---
+
+# Architecture
+
+RedisVL sits between your application and Redis, providing a structured way to define, populate, and query vector search indexes.
+
+```{image} /_static/redisvl-architecture.svg
+:alt: RedisVL Architecture
+:align: center
+:width: 100%
+```
+
+## The Core Pattern
+
+Every RedisVL application follows a consistent workflow: **define → create → load → query**.
+
+First, you define a **schema** that describes your data. The schema specifies which fields exist, what types they are, and how they should be indexed. This includes declaring vector fields with their dimensionality and the algorithm Redis should use for similarity search.
+
+Next, you create an **index** in Redis based on that schema. The index is a persistent structure that Redis uses to make searches fast. Creating an index tells Redis how to organize and access your data.
+
+Then you **load** your data. Documents are stored as Redis Hash or JSON objects. As documents are written, Redis automatically indexes them according to your schema—no separate indexing step required.
+
+Finally, you **query** the index. RedisVL provides query builders that construct Redis search commands for you. You can search by vector similarity, filter by metadata, combine multiple criteria, or mix full-text search with semantic search.
+
+This pattern applies whether you're building a simple semantic search or a complex multi-modal retrieval system.
+
+## Schemas as Contracts
+
+The schema is the source of truth for your index. It defines the contract between your data and Redis.
+
+A schema includes the index name, a key prefix (so Redis knows which keys belong to this index), the storage type (Hash or JSON), and a list of field definitions. Each field has a name, a type, and type-specific configuration.
+
+For vector fields, you specify the dimensionality (which must match your embedding model's output), the distance metric (cosine, Euclidean, or inner product), and the indexing algorithm. These choices are locked in when the index is created—changing them requires building a new index.
+
+The schema can be defined programmatically in Python or loaded from a YAML file. YAML schemas are useful for version control, sharing between environments, and keeping configuration separate from code.
+
+## Query Composition
+
+RedisVL's query builders let you compose search operations without writing raw Redis commands.
+
+**Vector queries** find the K most similar items to a query vector. You provide an embedding, and Redis returns the nearest neighbors according to your configured distance metric.
+
+**Range queries** find all vectors within a distance threshold. Instead of asking for the top K, you're asking for everything "close enough" to a query point.
+
+**Filter queries** narrow results by metadata. You can filter on text fields, tags, numeric ranges, and geographic areas. Filters apply before the vector search, reducing the candidate set.
+
+**Hybrid queries** combine keyword search with semantic search. This is useful when you want to match on specific terms while also considering semantic relevance.
+
+These query types can be combined. A typical pattern is vector search with metadata filters—for example, finding similar products but only in a specific category or price range.
+
+## Extensions as Patterns
+
+Extensions are higher-level abstractions built on RedisVL's core primitives. Each extension encapsulates a common AI workflow pattern.
+
+**Semantic caching** stores LLM responses and retrieves them when similar prompts are seen again. This reduces API costs and latency without requiring exact-match caching.
+
+**Message history** stores conversation turns and retrieves context for LLM prompts. It can retrieve by recency (most recent messages) or by relevance (semantically similar messages).
+
+**Semantic routing** classifies queries into predefined categories based on similarity to reference phrases. This enables intent detection, topic routing, and guardrails.
+
+Each extension manages its own Redis index internally. You interact with a clean, purpose-specific API rather than managing schemas and queries yourself.
+
+---
+
+**Learn more:** {doc}`/user_guide/01_getting_started` covers the core workflow. {doc}`extensions` explains each extension pattern in detail.
+
diff --git a/docs/concepts/extensions.md b/docs/concepts/extensions.md
@@ -0,0 +1,89 @@
+---
+myst:
+  html_meta:
+    "description lang=en": |
+      RedisVL extensions - semantic caching, embeddings caching, message history, and routing.
+---
+
+# Extensions
+
+Extensions are opinionated, higher-level abstractions built on RedisVL's core primitives. Each extension encapsulates a common AI application pattern, managing its own Redis index internally and exposing a clean, purpose-specific API.
+
+You don't need to understand schemas, indexes, or queries to use extensions—they handle that complexity for you.
+
+## Semantic Cache
+
+LLM API calls are expensive and slow. If users ask similar questions, you're paying to generate similar answers repeatedly. Semantic caching solves this by storing responses and returning cached answers when similar prompts are seen again.
+
+### How It Works
+
+When a prompt arrives, the cache embeds it and searches for similar cached prompts. If a match is found within the configured distance threshold, the cached response is returned immediately—no LLM call needed. If no match is found, you call the LLM, store the prompt-response pair, and return the response.
+
+The key insight is "similar" rather than "identical." Traditional caching requires exact matches. Semantic caching matches by meaning, so "What's the capital of France?" and "Tell me France's capital city" can hit the same cache entry.
+
+### Threshold Tuning
+
+The distance threshold controls how similar prompts must be to match. A strict threshold (low value, like 0.05) requires near-identical prompts. A loose threshold (higher value, like 0.3) matches more liberally.
+
+Too strict, and you miss valid cache hits. Too loose, and you return wrong answers for different questions. Start strict, monitor cache quality in production, and loosen gradually based on observed behavior.
+
+### Multi-Tenant Isolation
+
+In applications serving multiple users or contexts, you often want separate cache spaces. Filters let you scope cache lookups—for example, caching per-user or per-conversation so one user's cached answers don't leak to another.
+
+## Embeddings Cache
+
+Embedding APIs have per-token costs, and computing the same embedding repeatedly wastes money. The embeddings cache stores computed embeddings and returns them on subsequent requests for the same content.
+
+### How It Works
+
+Unlike semantic cache (which uses similarity search), embeddings cache uses exact key matching. A deterministic hash is computed from the input text and model name. If that hash exists in the cache, the stored embedding is returned. If not, the embedding is computed, stored, and returned.
+
+This is useful when the same content is embedded multiple times—common in applications where users submit similar queries, or where documents are re-processed periodically.
+
+### Wrapping Vectorizers
+
+The embeddings cache can wrap any vectorizer, adding transparent caching. Calling the wrapped vectorizer checks the cache first. This requires no changes to your embedding code—just wrap the vectorizer and caching happens automatically.
+
+## Message History
+
+LLMs are stateless. To have a conversation, you must include previous messages in each prompt. Message history manages this context, storing conversation turns and retrieving them when building prompts.
+
+### Storage Model
+
+Each message includes a role (user, assistant, system, or tool), the message content, a timestamp, and a session identifier. The session tag groups messages into conversations—you might have one session per user, per chat thread, or per agent instance.
+
+### Retrieval Strategies
+
+The simplest retrieval is by recency: get the N most recent messages. This works for short conversations but breaks down when context exceeds the LLM's token limit or when relevant information appeared earlier in a long conversation.
+
+Semantic message history adds vector search. Messages are embedded, and you can retrieve by relevance rather than recency. This is powerful for long conversations where the user might reference something said much earlier, or for agents that need to recall specific instructions from their setup.
+
+### Session Isolation
+
+Session tags are critical for multi-user applications. Each user's conversation should be isolated, so retrieving context for User A doesn't include messages from User B. The session tag provides this isolation, and you can structure sessions however makes sense—per-user, per-thread, per-agent, or any other grouping.
+
+## Semantic Router
+
+Semantic routing classifies queries into predefined categories based on meaning. It's a lightweight alternative to classification models, useful for intent detection, topic routing, and guardrails.
+
+### How It Works
+
+You define routes, each with a name and a set of reference phrases that represent that category. The router embeds all references and indexes them. At runtime, an incoming query is embedded and compared against all route references. The route whose references are closest to the query wins—if it's within the configured distance threshold.
+
+For example, a customer support router might have routes for "billing," "technical support," and "account management," each with 5-10 reference phrases. When a user asks "I can't log into my account," the router matches it to the "account management" route based on semantic similarity to that route's references.
+
+### Threshold and Aggregation
+
+Each route has its own distance threshold, controlling how close queries must be to match. Routes can also specify how to aggregate distances when multiple references match—taking the average or minimum distance.
+
+If no route matches (all distances exceed their thresholds), the router returns no match. This lets you handle out-of-scope queries gracefully rather than forcing a classification.
+
+### Use Cases
+
+Semantic routing is useful for intent classification (determining what a user wants), topic detection (categorizing content), guardrails (detecting and blocking certain query types), and agent dispatch (sending queries to specialized sub-agents).
+
+---
+
+**Learn more:** {doc}`/user_guide/03_llmcache` covers semantic caching. {doc}`/user_guide/07_message_history` explains conversation management. {doc}`/user_guide/08_semantic_router` walks through routing setup.
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,7 +20,6 @@ query @@
     filter
     vectorizer
     reranker
-    utils
     cache
     message_history
     router
@@ Expand Down @@