From d40e0469bc6a985b1bb226e4171f215c50ba88b6 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 11:05:29 +0100 Subject: [PATCH 01/11] wip: docker dev --- docker/cc-vec-bot.docker | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 docker/cc-vec-bot.docker diff --git a/docker/cc-vec-bot.docker b/docker/cc-vec-bot.docker new file mode 100644 index 0000000..b4e5f86 --- /dev/null +++ b/docker/cc-vec-bot.docker @@ -0,0 +1,58 @@ +FROM llamastack/distribution-starter:0.4.1 +LABEL maintainer="damian@commoncrawl.org" + +USER root + +# Install minimal dependencies required by the Ollama install script +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl ca-certificates gnupg \ + && rm -rf /var/lib/apt/lists/* + +# Install Ollama +RUN curl -fsSL https://ollama.ai/install.sh | sh + +ENV PATH="/usr/local/bin:${PATH}" + +# Default model to pull on startup (tinyllama ~637MB, smallest practical LLM) +# For even smaller (embeddings only): all-minilm (~45MB) +# For production: llama3.2:3b (~2GB) +ENV OLLAMA_MODEL="tinyllama" + +# Create entrypoint script that starts ollama and pulls model on first run +RUN cat <<'EOF' > /entrypoint.sh +#!/bin/bash +set -e + +# Start ollama server in background +ollama serve & +OLLAMA_PID=$! + +# Wait for ollama to be ready +echo "Waiting for Ollama to start..." +for i in {1..30}; do + if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then + echo "Ollama is ready" + break + fi + sleep 1 +done + +# Pull model if not already present +if [ -n "$OLLAMA_MODEL" ]; then + echo "Ensuring model $OLLAMA_MODEL is available..." + ollama pull "$OLLAMA_MODEL" +fi + +# If a command was passed, run it; otherwise wait on ollama +if [ $# -gt 0 ]; then + exec "$@" +else + wait $OLLAMA_PID +fi +EOF +RUN chmod +x /entrypoint.sh + +EXPOSE 11434 + +ENTRYPOINT ["/entrypoint.sh"] +CMD [] From df9533801dbd3ee0935579d8b504cc89afb66c4c Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 11:47:00 +0100 Subject: [PATCH 02/11] wip: docker image running --- README.md | 15 +++ docker/Dockerfile.cc-vec-bot | 177 +++++++++++++++++++++++++++++++++++ docker/cc-vec-bot.docker | 58 ------------ 3 files changed, 192 insertions(+), 58 deletions(-) create mode 100644 docker/Dockerfile.cc-vec-bot delete mode 100644 docker/cc-vec-bot.docker diff --git a/README.md b/README.md index ad49916..92e6956 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,21 @@ export AWS_SECRET_ACCESS_KEY=your-secret uv run cc-vec index --url-patterns "%.edu" --limit 10 ``` +damian: +# Set environment variables +export OPENAI_BASE_URL=http://localhost:8321/v1 +export OPENAI_API_KEY=none # Llama Stack doesn't require a real key +export OPENAI_EMBEDDING_MODEL=ollama/nomic-embed-text:latest +export OPENAI_EMBEDDING_DIMENSIONS=768 + +# Set your Athena credentials +export ATHENA_OUTPUT_BUCKET=s3://cc-vec-damian-01/test-results +export AWS_PROFILE=cc-volunteers +export AWS_DEFAULT_REGION=us-east-1 + +# Use cc-vec with local models +uv run cc-vec index --url-patterns "%.edu" --limit 10 + **Documentation:** - [Llama Stack Docs](https://llamastack.github.io/) - [Llama Stack GitHub](https://github.com/meta-llama/llama-stack) diff --git a/docker/Dockerfile.cc-vec-bot b/docker/Dockerfile.cc-vec-bot new file mode 100644 index 0000000..7f33aa9 --- /dev/null +++ b/docker/Dockerfile.cc-vec-bot @@ -0,0 +1,177 @@ +FROM llamastack/distribution-starter:0.4.1 +LABEL maintainer="damian@commoncrawl.org" + +USER root + +# Install minimal dependencies required by the Ollama install script +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl ca-certificates gnupg zstd \ + && rm -rf /var/lib/apt/lists/* + +# Install Ollama (for local inference when OLLAMA_URL is not set) +RUN curl -fsSL https://ollama.ai/install.sh | sh + +ENV PATH="/usr/local/bin:${PATH}" + +# --------------------------------------------------------------------------- +# Build-time model pre-fetch (optional) +# Set PREFETCH_MODEL=1 to bake the model into the image at build time. +# This makes the image larger but faster to start. +# +# Build examples: +# docker build --build-arg PREFETCH_MODEL=1 -t cc-vec-bot . # bake tinyllama +# docker build --build-arg PREFETCH_MODEL=1 --build-arg INFERENCE_MODEL=llama3.2:3b -t cc-vec-bot . +# docker build -t cc-vec-bot . # no prefetch (default) +# --------------------------------------------------------------------------- +ARG PREFETCH_MODEL=0 +ARG INFERENCE_MODEL=tinyllama + +# Pre-fetch model at build time if PREFETCH_MODEL=1 +# Requires starting ollama serve temporarily during build +RUN if [ "$PREFETCH_MODEL" = "1" ]; then \ + echo "Pre-fetching model: ${INFERENCE_MODEL}"; \ + ollama serve & \ + OLLAMA_PID=$!; \ + sleep 5; \ + for i in 1 2 3 4 5 6 7 8 9 10; do \ + curl -s http://localhost:11434/api/tags >/dev/null 2>&1 && break; \ + sleep 2; \ + done; \ + ollama pull "${INFERENCE_MODEL}"; \ + kill $OLLAMA_PID 2>/dev/null || true; \ + echo "Model ${INFERENCE_MODEL} pre-fetched successfully"; \ + else \ + echo "Skipping model pre-fetch (PREFETCH_MODEL=0)"; \ + fi + +# --------------------------------------------------------------------------- +# Compatibility with deprecated llamastack/distribution-ollama +# Usage: +# export LLAMA_STACK_PORT=5001 +# docker run -it \ +# -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ +# -v ~/.llama:/root/.llama \ +# cc-vec-bot \ +# --port $LLAMA_STACK_PORT \ +# --env INFERENCE_MODEL=tinyllama \ +# --env OLLAMA_URL=http://host.docker.internal:11434 +# +# Or with built-in Ollama (no external Ollama needed): +# docker run -it \ +# -p 5001:5001 -p 11434:11434 \ +# -v ~/.llama:/root/.llama \ +# cc-vec-bot \ +# --port 5001 \ +# --env INFERENCE_MODEL=tinyllama +# --------------------------------------------------------------------------- + +# Default model (inherits from build ARG, can be overridden at runtime) +# tinyllama ~637MB (smallest practical LLM) +# all-minilm ~45MB (embeddings only) +# llama3.2:3b ~2GB (production) +ENV INFERENCE_MODEL=${INFERENCE_MODEL} + +# Default ports +ENV LLAMA_STACK_PORT=5001 +ENV OLLAMA_PORT=11434 + +# Create entrypoint script compatible with distribution-ollama CLI args +RUN cat <<'EOF' > /entrypoint.sh +#!/bin/bash +set -e + +# Parse --port and --env arguments (compatible with distribution-ollama) +while [[ $# -gt 0 ]]; do + case $1 in + --port) + LLAMA_STACK_PORT="$2" + shift 2 + ;; + --env) + # Parse KEY=VALUE and export it + if [[ "$2" =~ ^([^=]+)=(.*)$ ]]; then + export "${BASH_REMATCH[1]}"="${BASH_REMATCH[2]}" + fi + shift 2 + ;; + *) + # Unknown option, pass through + EXTRA_ARGS+=("$1") + shift + ;; + esac +done + +echo "==============================================" +echo "cc-vec-bot (llama-stack + ollama)" +echo "==============================================" +echo "LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001}" +echo "INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama}" +echo "OLLAMA_URL: ${OLLAMA_URL:-}" +echo "==============================================" + +# Determine Ollama URL +if [ -z "$OLLAMA_URL" ]; then + # No external Ollama URL provided - start local Ollama + echo "Starting local Ollama server..." + ollama serve & + OLLAMA_PID=$! + OLLAMA_URL="http://localhost:11434" + export OLLAMA_URL + + # Wait for Ollama to be ready + echo "Waiting for Ollama to start..." + for i in {1..30}; do + if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then + echo "Ollama is ready" + break + fi + if [ $i -eq 30 ]; then + echo "ERROR: Ollama failed to start" + exit 1 + fi + sleep 1 + done + + # Pull model if specified + if [ -n "$INFERENCE_MODEL" ]; then + echo "Pulling model: $INFERENCE_MODEL" + ollama pull "$INFERENCE_MODEL" + fi +else + # External Ollama URL provided - verify connectivity + echo "Using external Ollama at: $OLLAMA_URL" + for i in {1..10}; do + if curl -s "${OLLAMA_URL}/api/tags" >/dev/null 2>&1; then + echo "External Ollama is reachable" + break + fi + if [ $i -eq 10 ]; then + echo "WARNING: Cannot reach external Ollama at $OLLAMA_URL" + fi + sleep 1 + done +fi + +# Start llama-stack server +# The distribution-starter base image includes llama-stack +echo "Starting llama-stack on port ${LLAMA_STACK_PORT}..." +export OLLAMA_URL="${OLLAMA_URL}" +export INFERENCE_MODEL="${INFERENCE_MODEL}" +exec llama stack run starter \ + --port "${LLAMA_STACK_PORT}" \ + "${EXTRA_ARGS[@]}" +EOF +#--env OLLAMA_URL="${OLLAMA_URL}" \ +#--env INFERENCE_MODEL="${INFERENCE_MODEL}" \ + +RUN chmod +x /entrypoint.sh + +# Volume for llama-stack config and model cache +VOLUME ["/root/.llama"] + +# Expose both llama-stack and ollama ports +EXPOSE 5001 11434 + +ENTRYPOINT ["/entrypoint.sh"] +CMD [] diff --git a/docker/cc-vec-bot.docker b/docker/cc-vec-bot.docker deleted file mode 100644 index b4e5f86..0000000 --- a/docker/cc-vec-bot.docker +++ /dev/null @@ -1,58 +0,0 @@ -FROM llamastack/distribution-starter:0.4.1 -LABEL maintainer="damian@commoncrawl.org" - -USER root - -# Install minimal dependencies required by the Ollama install script -RUN apt-get update \ - && apt-get install -y --no-install-recommends curl ca-certificates gnupg \ - && rm -rf /var/lib/apt/lists/* - -# Install Ollama -RUN curl -fsSL https://ollama.ai/install.sh | sh - -ENV PATH="/usr/local/bin:${PATH}" - -# Default model to pull on startup (tinyllama ~637MB, smallest practical LLM) -# For even smaller (embeddings only): all-minilm (~45MB) -# For production: llama3.2:3b (~2GB) -ENV OLLAMA_MODEL="tinyllama" - -# Create entrypoint script that starts ollama and pulls model on first run -RUN cat <<'EOF' > /entrypoint.sh -#!/bin/bash -set -e - -# Start ollama server in background -ollama serve & -OLLAMA_PID=$! - -# Wait for ollama to be ready -echo "Waiting for Ollama to start..." -for i in {1..30}; do - if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then - echo "Ollama is ready" - break - fi - sleep 1 -done - -# Pull model if not already present -if [ -n "$OLLAMA_MODEL" ]; then - echo "Ensuring model $OLLAMA_MODEL is available..." - ollama pull "$OLLAMA_MODEL" -fi - -# If a command was passed, run it; otherwise wait on ollama -if [ $# -gt 0 ]; then - exec "$@" -else - wait $OLLAMA_PID -fi -EOF -RUN chmod +x /entrypoint.sh - -EXPOSE 11434 - -ENTRYPOINT ["/entrypoint.sh"] -CMD [] From 183fbaa1f8178f2276fee93ef228b964d5414a8e Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 12:19:21 +0100 Subject: [PATCH 03/11] wip: non-streaming works --- chatbot-frontend/README.md | 32 ++++++++++++++++++++++++ chatbot-frontend/api.py | 41 +++++++++++++++++++++++++++++++ chatbot-frontend/index.html | 38 ++++++++++++++++++++++++++++ chatbot-frontend/requirements.txt | 3 +++ docker/Dockerfile.cc-vec-bot | 21 +++++++++++----- 5 files changed, 129 insertions(+), 6 deletions(-) create mode 100644 chatbot-frontend/README.md create mode 100644 chatbot-frontend/api.py create mode 100644 chatbot-frontend/index.html create mode 100644 chatbot-frontend/requirements.txt diff --git a/chatbot-frontend/README.md b/chatbot-frontend/README.md new file mode 100644 index 0000000..70f1e4d --- /dev/null +++ b/chatbot-frontend/README.md @@ -0,0 +1,32 @@ +# Minimal Llama Chatbot Frontend + +This directory contains a minimal web-based chatbot UI and a FastAPI backend that proxies requests to an Ollama (Llama) backend. + +## Usage + +1. **Install dependencies** + +```bash +pip install -r requirements.txt +``` + +2. **Run the backend** + +```bash +uvicorn api:app --reload +``` + +3. **Open the frontend** + +Open `index.html` in your browser (or serve it with any static file server). + +## Configuration + +- The backend expects an Ollama server running at `http://localhost:11434` by default. +- You can override the Ollama URL and model with environment variables: + - `OLLAMA_URL` (e.g. `http://localhost:11434/api/generate`) + - `INFERENCE_MODEL` (e.g. `tinyllama`) + +## Notes +- The backend is intentionally minimal and does not persist chat history. +- The frontend is pure HTML/JS, no frameworks or build step required. diff --git a/chatbot-frontend/api.py b/chatbot-frontend/api.py new file mode 100644 index 0000000..0bb7d7a --- /dev/null +++ b/chatbot-frontend/api.py @@ -0,0 +1,41 @@ +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, StreamingResponse, FileResponse +import httpx +import os +import json + +app = FastAPI() +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate") +INFERENCE_MODEL = os.environ.get("INFERENCE_MODEL", "tinyllama") +STREAMING = os.environ.get("OLLAMA_STREAMING", "0") != "0" + +@app.get("/") +async def serve_index(): + return FileResponse("index.html") + +@app.post("/api/chat") +async def chat(request: Request): + data = await request.json() + prompt = data.get("message", "") + payload = {"model": INFERENCE_MODEL, "prompt": prompt, "stream": STREAMING} + async with httpx.AsyncClient() as client: + r = await client.post(OLLAMA_URL, json=payload, timeout=None) + r.raise_for_status() + if STREAMING: + # Stream JSON lines to the frontend as a single event stream + async def event_stream(): + async for line in r.aiter_lines(): + line = line.strip() + print(line) + if not line: + continue + try: + obj = json.loads(line) + # Only send the 'response' field + yield f"data: {json.dumps({'response': obj.get('response', '')})}\n\n" + except Exception: + continue + return StreamingResponse(event_stream(), media_type="text/event-stream") + else: + result = r.json() + return JSONResponse({"response": result.get("response", "")}) diff --git a/chatbot-frontend/index.html b/chatbot-frontend/index.html new file mode 100644 index 0000000..a931bf9 --- /dev/null +++ b/chatbot-frontend/index.html @@ -0,0 +1,38 @@ + + + + Llama Chatbot + + + +

Llama Chatbot

+
+ + + + + diff --git a/chatbot-frontend/requirements.txt b/chatbot-frontend/requirements.txt new file mode 100644 index 0000000..c937678 --- /dev/null +++ b/chatbot-frontend/requirements.txt @@ -0,0 +1,3 @@ +fastapi +httpx +uvicorn diff --git a/docker/Dockerfile.cc-vec-bot b/docker/Dockerfile.cc-vec-bot index 7f33aa9..16a7131 100644 --- a/docker/Dockerfile.cc-vec-bot +++ b/docker/Dockerfile.cc-vec-bot @@ -5,7 +5,7 @@ USER root # Install minimal dependencies required by the Ollama install script RUN apt-get update \ - && apt-get install -y --no-install-recommends curl ca-certificates gnupg zstd \ + && apt-get install -y --no-install-recommends curl ca-certificates gnupg \ && rm -rf /var/lib/apt/lists/* # Install Ollama (for local inference when OLLAMA_URL is not set) @@ -75,6 +75,17 @@ ENV INFERENCE_MODEL=${INFERENCE_MODEL} ENV LLAMA_STACK_PORT=5001 ENV OLLAMA_PORT=11434 + +# --------------------------------------------------------------------------- +# Install chatbot-frontend +# --------------------------------------------------------------------------- + + + +# --------------------------------------------------------------------------- +# Construct entrypoint script +# --------------------------------------------------------------------------- + # Create entrypoint script compatible with distribution-ollama CLI args RUN cat <<'EOF' > /entrypoint.sh #!/bin/bash @@ -156,14 +167,12 @@ fi # Start llama-stack server # The distribution-starter base image includes llama-stack echo "Starting llama-stack on port ${LLAMA_STACK_PORT}..." -export OLLAMA_URL="${OLLAMA_URL}" -export INFERENCE_MODEL="${INFERENCE_MODEL}" -exec llama stack run starter \ +exec llama stack run /root/.llama/distributions/ollama/run.yaml \ --port "${LLAMA_STACK_PORT}" \ + --env OLLAMA_URL="${OLLAMA_URL}" \ + --env INFERENCE_MODEL="${INFERENCE_MODEL}" \ "${EXTRA_ARGS[@]}" EOF -#--env OLLAMA_URL="${OLLAMA_URL}" \ -#--env INFERENCE_MODEL="${INFERENCE_MODEL}" \ RUN chmod +x /entrypoint.sh From 0b6213abdbd96bb346eafeba5d9ffa6ddee41444 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 12:38:51 +0100 Subject: [PATCH 04/11] wip: chatbot works inside docker container --- chatbot-frontend/api.py | 1 + chatbot-frontend/index.html | 3 ++- docker/Dockerfile.cc-vec-bot | 24 ++++++++++++++++++------ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/chatbot-frontend/api.py b/chatbot-frontend/api.py index 0bb7d7a..755a753 100644 --- a/chatbot-frontend/api.py +++ b/chatbot-frontend/api.py @@ -11,6 +11,7 @@ @app.get("/") async def serve_index(): + print("serving index.html") return FileResponse("index.html") @app.post("/api/chat") diff --git a/chatbot-frontend/index.html b/chatbot-frontend/index.html index a931bf9..282f323 100644 --- a/chatbot-frontend/index.html +++ b/chatbot-frontend/index.html @@ -9,7 +9,8 @@ -

Llama Chatbot

+

Creative Commons Chatbot

+

powered by Llama Stack


diff --git a/docker/Dockerfile.cc-vec-bot b/docker/Dockerfile.cc-vec-bot index 16a7131..d6a71a6 100644 --- a/docker/Dockerfile.cc-vec-bot +++ b/docker/Dockerfile.cc-vec-bot @@ -5,7 +5,7 @@ USER root # Install minimal dependencies required by the Ollama install script RUN apt-get update \ - && apt-get install -y --no-install-recommends curl ca-certificates gnupg \ + && apt-get install -y --no-install-recommends curl ca-certificates gnupg zstd \ && rm -rf /var/lib/apt/lists/* # Install Ollama (for local inference when OLLAMA_URL is not set) @@ -74,13 +74,18 @@ ENV INFERENCE_MODEL=${INFERENCE_MODEL} # Default ports ENV LLAMA_STACK_PORT=5001 ENV OLLAMA_PORT=11434 - +ENV CHATBOT_PORT=8008 # --------------------------------------------------------------------------- # Install chatbot-frontend # --------------------------------------------------------------------------- - +WORKDIR /opt/chatbot-frontend +COPY chatbot-frontend . +RUN apt-get update && apt-get install -y python3-pip && \ + pip3 install --no-cache-dir -r requirements.txt && \ + apt-get remove -y python3-pip && apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* # --------------------------------------------------------------------------- # Construct entrypoint script @@ -164,13 +169,20 @@ else done fi +# Start the chatbot in the background +echo "Starting chatbot-frontend..." +(cd /opt/chatbot-frontend && uvicorn api:app --host 0.0.0.0 --port ${CHATBOT_PORT:-8000} &) +#(cd /opt/chatbot-frontend && uvicorn api:app --host 0.0.0.0 --port ${CHATBOT_PORT:-8000}) +CHATBOT_PID=$! +echo "Chatbot-frontend started with PID ${CHATBOT_PID}" + # Start llama-stack server # The distribution-starter base image includes llama-stack echo "Starting llama-stack on port ${LLAMA_STACK_PORT}..." -exec llama stack run /root/.llama/distributions/ollama/run.yaml \ +export OLLAMA_URL="${OLLAMA_URL}" +export INFERENCE_MODEL="${INFERENCE_MODEL}" +exec llama stack run starter \ --port "${LLAMA_STACK_PORT}" \ - --env OLLAMA_URL="${OLLAMA_URL}" \ - --env INFERENCE_MODEL="${INFERENCE_MODEL}" \ "${EXTRA_ARGS[@]}" EOF From 5264bd2e0bbf271eba262471293d4062d915685b Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 13:38:11 +0100 Subject: [PATCH 05/11] feat: working chatbot --- chatbot-frontend/api.py | 2 +- docker/.env.sample | 24 ++++++++++++++++++++++++ docker/.gitignore | 1 + docker/docker-compose.yaml | 21 +++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 docker/.env.sample create mode 100644 docker/.gitignore create mode 100644 docker/docker-compose.yaml diff --git a/chatbot-frontend/api.py b/chatbot-frontend/api.py index 755a753..992af70 100644 --- a/chatbot-frontend/api.py +++ b/chatbot-frontend/api.py @@ -5,7 +5,7 @@ import json app = FastAPI() -OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate") +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434") + "/api/generate" INFERENCE_MODEL = os.environ.get("INFERENCE_MODEL", "tinyllama") STREAMING = os.environ.get("OLLAMA_STREAMING", "0") != "0" diff --git a/docker/.env.sample b/docker/.env.sample new file mode 100644 index 0000000..8ce777f --- /dev/null +++ b/docker/.env.sample @@ -0,0 +1,24 @@ +# Docker Compose Configuration for cc-vec-bot +# Copy this file to .env and customize values as needed + +# Run: docker compose up --build + +# Inference model to use (tinyllama, llama2, llama3.2:3b, etc.) +# recommended: tinyllama for local testing (700MB), llama3.2:3B for production +INFERENCE_MODEL=tinyllama + +# LLM model files are large. +# Set to 1 to pre-fetch the model at build time (increases image size and build time) +# Set to 0 to fetch when the image is run (smaller, faster build but redundant runtime fetch of large models) +PREFETCH_MODEL=1 + +# Ports +LLAMA_STACK_PORT=5001 +CHATBOT_PORT=8008 +OLLAMA_PORT=11434 + +# External ollama/chromadb URL +# Set these if you want to customize llama-stack's behavior +# OLLAMA_URL=http://localhost:11434 +# CHROMADB_URL=http://localhost:8000 + diff --git a/docker/.gitignore b/docker/.gitignore new file mode 100644 index 0000000..4c49bd7 --- /dev/null +++ b/docker/.gitignore @@ -0,0 +1 @@ +.env diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml new file mode 100644 index 0000000..9af9646 --- /dev/null +++ b/docker/docker-compose.yaml @@ -0,0 +1,21 @@ +services: + cc-vec-bot: + build: + context: .. + dockerfile: docker/Dockerfile.cc-vec-bot + args: + PREFETCH_MODEL: ${PREFETCH_MODEL:-0} + INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} + image: cc-vec-bot + environment: + LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001} + CHATBOT_PORT: ${CHATBOT_PORT:-8008} + OLLAMA_PORT: ${OLLAMA_PORT:-11434} + ports: + - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" + - "${CHATBOT_PORT:-8008}:${CHATBOT_PORT:-8008}" + - "${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434}" + volumes: + - ~/.llama:/root/.llama + command: ["--port", "${LLAMA_STACK_PORT:-5001}"] + From 9321c074a7a625f41478d48aeb97ea48cbf734de Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 13:50:51 +0100 Subject: [PATCH 06/11] feat: chatbot streaming, readme cleanup --- chatbot-frontend/README.md | 30 ++++++++--------------- chatbot-frontend/api.py | 47 +++++++++++++++++++++--------------- chatbot-frontend/index.html | 40 ++++++++++++++++++++++++++++-- docker/Dockerfile.cc-vec-bot | 7 ++---- docker/README.md | 11 +++++++++ docker/docker-compose.yaml | 2 +- 6 files changed, 89 insertions(+), 48 deletions(-) create mode 100644 docker/README.md diff --git a/chatbot-frontend/README.md b/chatbot-frontend/README.md index 70f1e4d..3b06cc1 100644 --- a/chatbot-frontend/README.md +++ b/chatbot-frontend/README.md @@ -1,32 +1,22 @@ -# Minimal Llama Chatbot Frontend +# Minimal CC Chatbot Frontend -This directory contains a minimal web-based chatbot UI and a FastAPI backend that proxies requests to an Ollama (Llama) backend. +## Quickstart -## Usage - -1. **Install dependencies** +Launch ollama server, then run: ```bash pip install -r requirements.txt -``` - -2. **Run the backend** - -```bash uvicorn api:app --reload ``` -3. **Open the frontend** - -Open `index.html` in your browser (or serve it with any static file server). +Click on the link that uvicorn prints in your terminal to open the frontend. ## Configuration -- The backend expects an Ollama server running at `http://localhost:11434` by default. -- You can override the Ollama URL and model with environment variables: - - `OLLAMA_URL` (e.g. `http://localhost:11434/api/generate`) - - `INFERENCE_MODEL` (e.g. `tinyllama`) +Configuration is done via environment variables in `api.py`. Defaults: -## Notes -- The backend is intentionally minimal and does not persist chat history. -- The frontend is pure HTML/JS, no frameworks or build step required. +```bash +OLLAMA_URL=http://localhost:11434 # /api/generate is appended +INFERENCE_MODEL=tinyllama +STREAMING=1 +```` diff --git a/chatbot-frontend/api.py b/chatbot-frontend/api.py index 992af70..0091cbc 100644 --- a/chatbot-frontend/api.py +++ b/chatbot-frontend/api.py @@ -7,7 +7,7 @@ app = FastAPI() OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434") + "/api/generate" INFERENCE_MODEL = os.environ.get("INFERENCE_MODEL", "tinyllama") -STREAMING = os.environ.get("OLLAMA_STREAMING", "0") != "0" +STREAMING = os.environ.get("OLLAMA_STREAMING", "1") != "0" @app.get("/") async def serve_index(): @@ -19,24 +19,31 @@ async def chat(request: Request): data = await request.json() prompt = data.get("message", "") payload = {"model": INFERENCE_MODEL, "prompt": prompt, "stream": STREAMING} - async with httpx.AsyncClient() as client: - r = await client.post(OLLAMA_URL, json=payload, timeout=None) - r.raise_for_status() - if STREAMING: - # Stream JSON lines to the frontend as a single event stream - async def event_stream(): - async for line in r.aiter_lines(): - line = line.strip() - print(line) - if not line: - continue - try: - obj = json.loads(line) - # Only send the 'response' field - yield f"data: {json.dumps({'response': obj.get('response', '')})}\n\n" - except Exception: - continue - return StreamingResponse(event_stream(), media_type="text/event-stream") - else: + + if STREAMING: + # Stream JSON lines to the frontend as SSE + async def event_stream(): + async with httpx.AsyncClient(timeout=None) as client: + async with client.stream("POST", OLLAMA_URL, json=payload) as r: + r.raise_for_status() + async for line in r.aiter_lines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + # Only send the 'response' field + response_text = obj.get('response', '') + if response_text: + yield f"data: {json.dumps({'response': response_text})}\n\n" + except Exception as e: + print(f"Error parsing line: {e}") + continue + return StreamingResponse(event_stream(), media_type="text/event-stream") + else: + # Non-streaming mode + async with httpx.AsyncClient(timeout=None) as client: + r = await client.post(OLLAMA_URL, json=payload) + r.raise_for_status() result = r.json() return JSONResponse({"response": result.get("response", "")}) diff --git a/chatbot-frontend/index.html b/chatbot-frontend/index.html index 282f323..a3076df 100644 --- a/chatbot-frontend/index.html +++ b/chatbot-frontend/index.html @@ -22,13 +22,49 @@

Creative Commons Chatbot

if (!msg) return; chat.value += "You: " + msg + "\n"; input.value = ""; + const res = await fetch('/api/chat', { method: 'POST', headers: {'Content-Type': 'application/json'}, body: JSON.stringify({message: msg}) }); - const data = await res.json(); - chat.value += "Bot: " + data.response + "\n"; + + // Check if response is streaming (SSE) or regular JSON + const contentType = res.headers.get('content-type'); + if (contentType && contentType.includes('text/event-stream')) { + // Handle streaming response + chat.value += "Bot: "; + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + while (true) { + const {done, value} = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, {stream: true}); + const lines = buffer.split('\n'); + buffer = lines.pop(); // Keep incomplete line in buffer + + for (const line of lines) { + if (line.startsWith('data: ')) { + try { + const data = JSON.parse(line.slice(6)); + chat.value += data.response || ''; + chat.scrollTop = chat.scrollHeight; + } catch (e) { + // Skip malformed JSON + } + } + } + } + chat.value += "\n"; + } else { + // Handle non-streaming JSON response + const data = await res.json(); + chat.value += "Bot: " + data.response + "\n"; + } + chat.scrollTop = chat.scrollHeight; } document.getElementById('input').addEventListener('keydown', e => { diff --git a/docker/Dockerfile.cc-vec-bot b/docker/Dockerfile.cc-vec-bot index d6a71a6..f2f7271 100644 --- a/docker/Dockerfile.cc-vec-bot +++ b/docker/Dockerfile.cc-vec-bot @@ -5,7 +5,7 @@ USER root # Install minimal dependencies required by the Ollama install script RUN apt-get update \ - && apt-get install -y --no-install-recommends curl ca-certificates gnupg zstd \ + && apt-get install -y --no-install-recommends curl ca-certificates gnupg zstd python3-pip \ && rm -rf /var/lib/apt/lists/* # Install Ollama (for local inference when OLLAMA_URL is not set) @@ -82,10 +82,7 @@ ENV CHATBOT_PORT=8008 WORKDIR /opt/chatbot-frontend COPY chatbot-frontend . -RUN apt-get update && apt-get install -y python3-pip && \ - pip3 install --no-cache-dir -r requirements.txt && \ - apt-get remove -y python3-pip && apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* +RUN pip3 install --no-cache-dir -r requirements.txt # --------------------------------------------------------------------------- # Construct entrypoint script diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..0ea6a30 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,11 @@ +# CC chatbot docker setup + +## Quickstart + +To run with default configuration (internal ollama, tinyllama baked into image, chatbot on http://localhost:8008), run: + +`docker-compose up --build` + +## Configuration + +Copy `.env.sample` to `.env` and modify as needed to customize configuration. diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 9af9646..0c31430 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -4,7 +4,7 @@ services: context: .. dockerfile: docker/Dockerfile.cc-vec-bot args: - PREFETCH_MODEL: ${PREFETCH_MODEL:-0} + PREFETCH_MODEL: ${PREFETCH_MODEL:-1} INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} image: cc-vec-bot environment: From 01ce32c4994ce38134ef083e1895de70aabd892c Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 13:57:44 +0100 Subject: [PATCH 07/11] chore: simplify dockerfile by making entrypoint.sh its own file --- docker/.env.sample | 3 ++ docker/Dockerfile.cc-vec-bot | 101 ++--------------------------------- docker/docker-compose.yaml | 10 ++-- docker/entrypoint.sh | 93 ++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 100 deletions(-) create mode 100644 docker/entrypoint.sh diff --git a/docker/.env.sample b/docker/.env.sample index 8ce777f..3e7be46 100644 --- a/docker/.env.sample +++ b/docker/.env.sample @@ -17,6 +17,9 @@ LLAMA_STACK_PORT=5001 CHATBOT_PORT=8008 OLLAMA_PORT=11434 +# Streaming mode for chatbot responses (0=off, 1=on) +OLLAMA_STREAMING=1 + # External ollama/chromadb URL # Set these if you want to customize llama-stack's behavior # OLLAMA_URL=http://localhost:11434 diff --git a/docker/Dockerfile.cc-vec-bot b/docker/Dockerfile.cc-vec-bot index f2f7271..30f729e 100644 --- a/docker/Dockerfile.cc-vec-bot +++ b/docker/Dockerfile.cc-vec-bot @@ -76,6 +76,9 @@ ENV LLAMA_STACK_PORT=5001 ENV OLLAMA_PORT=11434 ENV CHATBOT_PORT=8008 +# Streaming mode (0=off, 1=on) +ENV OLLAMA_STREAMING=1 + # --------------------------------------------------------------------------- # Install chatbot-frontend # --------------------------------------------------------------------------- @@ -85,104 +88,10 @@ COPY chatbot-frontend . RUN pip3 install --no-cache-dir -r requirements.txt # --------------------------------------------------------------------------- -# Construct entrypoint script +# Copy and setup entrypoint script # --------------------------------------------------------------------------- -# Create entrypoint script compatible with distribution-ollama CLI args -RUN cat <<'EOF' > /entrypoint.sh -#!/bin/bash -set -e - -# Parse --port and --env arguments (compatible with distribution-ollama) -while [[ $# -gt 0 ]]; do - case $1 in - --port) - LLAMA_STACK_PORT="$2" - shift 2 - ;; - --env) - # Parse KEY=VALUE and export it - if [[ "$2" =~ ^([^=]+)=(.*)$ ]]; then - export "${BASH_REMATCH[1]}"="${BASH_REMATCH[2]}" - fi - shift 2 - ;; - *) - # Unknown option, pass through - EXTRA_ARGS+=("$1") - shift - ;; - esac -done - -echo "==============================================" -echo "cc-vec-bot (llama-stack + ollama)" -echo "==============================================" -echo "LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001}" -echo "INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama}" -echo "OLLAMA_URL: ${OLLAMA_URL:-}" -echo "==============================================" - -# Determine Ollama URL -if [ -z "$OLLAMA_URL" ]; then - # No external Ollama URL provided - start local Ollama - echo "Starting local Ollama server..." - ollama serve & - OLLAMA_PID=$! - OLLAMA_URL="http://localhost:11434" - export OLLAMA_URL - - # Wait for Ollama to be ready - echo "Waiting for Ollama to start..." - for i in {1..30}; do - if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then - echo "Ollama is ready" - break - fi - if [ $i -eq 30 ]; then - echo "ERROR: Ollama failed to start" - exit 1 - fi - sleep 1 - done - - # Pull model if specified - if [ -n "$INFERENCE_MODEL" ]; then - echo "Pulling model: $INFERENCE_MODEL" - ollama pull "$INFERENCE_MODEL" - fi -else - # External Ollama URL provided - verify connectivity - echo "Using external Ollama at: $OLLAMA_URL" - for i in {1..10}; do - if curl -s "${OLLAMA_URL}/api/tags" >/dev/null 2>&1; then - echo "External Ollama is reachable" - break - fi - if [ $i -eq 10 ]; then - echo "WARNING: Cannot reach external Ollama at $OLLAMA_URL" - fi - sleep 1 - done -fi - -# Start the chatbot in the background -echo "Starting chatbot-frontend..." -(cd /opt/chatbot-frontend && uvicorn api:app --host 0.0.0.0 --port ${CHATBOT_PORT:-8000} &) -#(cd /opt/chatbot-frontend && uvicorn api:app --host 0.0.0.0 --port ${CHATBOT_PORT:-8000}) -CHATBOT_PID=$! -echo "Chatbot-frontend started with PID ${CHATBOT_PID}" - -# Start llama-stack server -# The distribution-starter base image includes llama-stack -echo "Starting llama-stack on port ${LLAMA_STACK_PORT}..." -export OLLAMA_URL="${OLLAMA_URL}" -export INFERENCE_MODEL="${INFERENCE_MODEL}" -exec llama stack run starter \ - --port "${LLAMA_STACK_PORT}" \ - "${EXTRA_ARGS[@]}" -EOF - +COPY docker/entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh # Volume for llama-stack config and model cache diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 0c31430..e835e85 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -11,11 +11,13 @@ services: LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001} CHATBOT_PORT: ${CHATBOT_PORT:-8008} OLLAMA_PORT: ${OLLAMA_PORT:-11434} + OLLAMA_STREAMING: ${OLLAMA_STREAMING:-1} + INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} ports: - - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" - - "${CHATBOT_PORT:-8008}:${CHATBOT_PORT:-8008}" - - "${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434}" + - "${LLAMA_STACK_PORT}:${LLAMA_STACK_PORT}" + - "${CHATBOT_PORT}:${CHATBOT_PORT}" + - "${OLLAMA_PORT}:${OLLAMA_PORT}" volumes: - ~/.llama:/root/.llama - command: ["--port", "${LLAMA_STACK_PORT:-5001}"] + command: ["--port", "${LLAMA_STACK_PORT}"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..cd08123 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,93 @@ +#!/bin/bash +set -e + +EXTRA_ARGS=() +while [[ $# -gt 0 ]]; do + case $1 in + --port) + LLAMA_STACK_PORT="$2" + shift 2 + ;; + *) + # Unknown option, pass through + EXTRA_ARGS+=("$1") + shift + ;; + esac +done + +echo "==============================================" +echo "cc-vec-bot (llama-stack + ollama)" +echo "==============================================" +echo "LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001}" +echo "INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama}" +echo "OLLAMA_URL: ${OLLAMA_URL:-}" +echo "CHATBOT_PORT: ${CHATBOT_PORT:-8008}" +echo "==============================================" + +# Determine Ollama URL +if [ -z "$OLLAMA_URL" ]; then + # No external Ollama URL provided - start local Ollama + echo "Starting local Ollama server..." + ollama serve & + OLLAMA_PID=$! + OLLAMA_URL="http://localhost:11434" + export OLLAMA_URL + + # Wait for Ollama to be ready + echo "Waiting for Ollama to start..." + for i in {1..30}; do + if curl -s http://localhost:11434/api/tags >/dev/null 2>&1; then + echo "Ollama is ready" + break + fi + if [ $i -eq 30 ]; then + echo "ERROR: Ollama failed to start" + exit 1 + fi + sleep 1 + done + + # Pull model if specified and PREFETCH_MODEL wasn't set at build time + if [ -n "$INFERENCE_MODEL" ]; then + echo "Checking if model needs to be pulled: $INFERENCE_MODEL" + if ! ollama list | grep -q "^${INFERENCE_MODEL}"; then + echo "Pulling model: $INFERENCE_MODEL" + ollama pull "$INFERENCE_MODEL" + else + echo "Model $INFERENCE_MODEL already available" + fi + fi +else + # External Ollama URL provided - verify connectivity + echo "Using external Ollama at: $OLLAMA_URL" + for i in {1..10}; do + if curl -s "${OLLAMA_URL}/api/tags" >/dev/null 2>&1; then + echo "External Ollama is reachable" + break + fi + if [ $i -eq 10 ]; then + echo "WARNING: Cannot reach external Ollama at $OLLAMA_URL" + fi + sleep 1 + done +fi + +# Start the chatbot in the background +echo "Starting chatbot-frontend on port ${CHATBOT_PORT}..." +(cd /opt/chatbot-frontend && uvicorn api:app --host 0.0.0.0 --port "${CHATBOT_PORT}" > /var/log/chatbot.log 2>&1 &) +CHATBOT_PID=$! +echo "Chatbot-frontend started with PID ${CHATBOT_PID}" + +# Give chatbot a moment to start +sleep 2 + +# Start llama-stack server +# The distribution-starter base image includes llama-stack +echo "Starting llama-stack on port ${LLAMA_STACK_PORT}..." +export OLLAMA_URL="${OLLAMA_URL}" +export INFERENCE_MODEL="${INFERENCE_MODEL}" +exec llama stack run starter \ + --port "${LLAMA_STACK_PORT}" \ + "${EXTRA_ARGS[@]}" + From 7c876f4a2f2adc59dfd0a84283b31cb1e43cff66 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 14:16:33 +0100 Subject: [PATCH 08/11] feat: external ollama; simplify instructions --- docker/.env.sample | 12 ++++++++---- docker/Dockerfile.cc-vec-bot | 1 - docker/README.md | 5 +++++ docker/docker-compose.yaml | 8 +++----- docker/entrypoint.sh | 8 +++++++- 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/docker/.env.sample b/docker/.env.sample index 3e7be46..76d48a2 100644 --- a/docker/.env.sample +++ b/docker/.env.sample @@ -15,13 +15,17 @@ PREFETCH_MODEL=1 # Ports LLAMA_STACK_PORT=5001 CHATBOT_PORT=8008 -OLLAMA_PORT=11434 # Streaming mode for chatbot responses (0=off, 1=on) OLLAMA_STREAMING=1 -# External ollama/chromadb URL -# Set these if you want to customize llama-stack's behavior -# OLLAMA_URL=http://localhost:11434 +# Ollama URL (optional - defaults to local Ollama on port 11434) +# Leave empty to use built-in Ollama, or set to external instance +# Examples: +# OLLAMA_URL=http://host.docker.internal:11434 +# OLLAMA_URL=http://192.168.1.100:11435 +# OLLAMA_URL= + +# ChromaDB URL (optional - for persistent vector storage) # CHROMADB_URL=http://localhost:8000 diff --git a/docker/Dockerfile.cc-vec-bot b/docker/Dockerfile.cc-vec-bot index 30f729e..abbdfce 100644 --- a/docker/Dockerfile.cc-vec-bot +++ b/docker/Dockerfile.cc-vec-bot @@ -73,7 +73,6 @@ ENV INFERENCE_MODEL=${INFERENCE_MODEL} # Default ports ENV LLAMA_STACK_PORT=5001 -ENV OLLAMA_PORT=11434 ENV CHATBOT_PORT=8008 # Streaming mode (0=off, 1=on) diff --git a/docker/README.md b/docker/README.md index 0ea6a30..b75ca18 100644 --- a/docker/README.md +++ b/docker/README.md @@ -9,3 +9,8 @@ To run with default configuration (internal ollama, tinyllama baked into image, ## Configuration Copy `.env.sample` to `.env` and modify as needed to customize configuration. + +Alternatively, set environment variables directly in your shell before running the `docker-compose up` command. For example: +```bash +OLLAMA_URL=http://host.docker.internal:11434 PREFETCH_MODEL=0 INFERENCE_MODEL=llama3.2:3B docker-compose up --build +``` \ No newline at end of file diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index e835e85..1a121eb 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -10,14 +10,12 @@ services: environment: LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001} CHATBOT_PORT: ${CHATBOT_PORT:-8008} - OLLAMA_PORT: ${OLLAMA_PORT:-11434} + OLLAMA_URL: ${OLLAMA_URL:-} OLLAMA_STREAMING: ${OLLAMA_STREAMING:-1} INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} ports: - - "${LLAMA_STACK_PORT}:${LLAMA_STACK_PORT}" - - "${CHATBOT_PORT}:${CHATBOT_PORT}" - - "${OLLAMA_PORT}:${OLLAMA_PORT}" + - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" + - "${CHATBOT_PORT:-8008}:${CHATBOT_PORT:-8008}" volumes: - ~/.llama:/root/.llama - command: ["--port", "${LLAMA_STACK_PORT}"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index cd08123..64d9eb9 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -75,7 +75,13 @@ fi # Start the chatbot in the background echo "Starting chatbot-frontend on port ${CHATBOT_PORT}..." -(cd /opt/chatbot-frontend && uvicorn api:app --host 0.0.0.0 --port "${CHATBOT_PORT}" > /var/log/chatbot.log 2>&1 &) +(cd /opt/chatbot-frontend && uvicorn api:app --host 0.0.0.0 --port "${CHATBOT_PORT}" &) +SUCCESS=$? +if [ $SUCCESS -ne 0 ]; then + cat /var/log/chatbot.log + echo "ERROR: Failed to start chatbot-frontend" + exit 1 +fi CHATBOT_PID=$! echo "Chatbot-frontend started with PID ${CHATBOT_PID}" From 057d1e2b39a871a805a5434203aa45d34ad25411 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 15 Jan 2026 14:25:15 +0100 Subject: [PATCH 09/11] chore: restructure --- {chatbot-frontend => cc-chatbot/chatbot}/README.md | 0 {chatbot-frontend => cc-chatbot/chatbot}/api.py | 0 {chatbot-frontend => cc-chatbot/chatbot}/index.html | 0 {chatbot-frontend => cc-chatbot/chatbot}/requirements.txt | 0 {docker => cc-chatbot/docker}/.env.sample | 0 {docker => cc-chatbot/docker}/.gitignore | 0 {docker => cc-chatbot/docker}/Dockerfile.cc-vec-bot | 2 +- {docker => cc-chatbot/docker}/README.md | 0 {docker => cc-chatbot/docker}/docker-compose.yaml | 0 {docker => cc-chatbot/docker}/entrypoint.sh | 0 10 files changed, 1 insertion(+), 1 deletion(-) rename {chatbot-frontend => cc-chatbot/chatbot}/README.md (100%) rename {chatbot-frontend => cc-chatbot/chatbot}/api.py (100%) rename {chatbot-frontend => cc-chatbot/chatbot}/index.html (100%) rename {chatbot-frontend => cc-chatbot/chatbot}/requirements.txt (100%) rename {docker => cc-chatbot/docker}/.env.sample (100%) rename {docker => cc-chatbot/docker}/.gitignore (100%) rename {docker => cc-chatbot/docker}/Dockerfile.cc-vec-bot (99%) rename {docker => cc-chatbot/docker}/README.md (100%) rename {docker => cc-chatbot/docker}/docker-compose.yaml (100%) rename {docker => cc-chatbot/docker}/entrypoint.sh (100%) diff --git a/chatbot-frontend/README.md b/cc-chatbot/chatbot/README.md similarity index 100% rename from chatbot-frontend/README.md rename to cc-chatbot/chatbot/README.md diff --git a/chatbot-frontend/api.py b/cc-chatbot/chatbot/api.py similarity index 100% rename from chatbot-frontend/api.py rename to cc-chatbot/chatbot/api.py diff --git a/chatbot-frontend/index.html b/cc-chatbot/chatbot/index.html similarity index 100% rename from chatbot-frontend/index.html rename to cc-chatbot/chatbot/index.html diff --git a/chatbot-frontend/requirements.txt b/cc-chatbot/chatbot/requirements.txt similarity index 100% rename from chatbot-frontend/requirements.txt rename to cc-chatbot/chatbot/requirements.txt diff --git a/docker/.env.sample b/cc-chatbot/docker/.env.sample similarity index 100% rename from docker/.env.sample rename to cc-chatbot/docker/.env.sample diff --git a/docker/.gitignore b/cc-chatbot/docker/.gitignore similarity index 100% rename from docker/.gitignore rename to cc-chatbot/docker/.gitignore diff --git a/docker/Dockerfile.cc-vec-bot b/cc-chatbot/docker/Dockerfile.cc-vec-bot similarity index 99% rename from docker/Dockerfile.cc-vec-bot rename to cc-chatbot/docker/Dockerfile.cc-vec-bot index abbdfce..0f0f81a 100644 --- a/docker/Dockerfile.cc-vec-bot +++ b/cc-chatbot/docker/Dockerfile.cc-vec-bot @@ -83,7 +83,7 @@ ENV OLLAMA_STREAMING=1 # --------------------------------------------------------------------------- WORKDIR /opt/chatbot-frontend -COPY chatbot-frontend . +COPY chatbot . RUN pip3 install --no-cache-dir -r requirements.txt # --------------------------------------------------------------------------- diff --git a/docker/README.md b/cc-chatbot/docker/README.md similarity index 100% rename from docker/README.md rename to cc-chatbot/docker/README.md diff --git a/docker/docker-compose.yaml b/cc-chatbot/docker/docker-compose.yaml similarity index 100% rename from docker/docker-compose.yaml rename to cc-chatbot/docker/docker-compose.yaml diff --git a/docker/entrypoint.sh b/cc-chatbot/docker/entrypoint.sh similarity index 100% rename from docker/entrypoint.sh rename to cc-chatbot/docker/entrypoint.sh From f884d91af784967ab19bb8d31ce7a6311b1fa6f5 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Mon, 19 Jan 2026 14:33:07 +0100 Subject: [PATCH 10/11] feat: docker-compose for convenience; README --- cc-chatbot/docker/README.md | 34 +++++++++++++++++++++++++++++++++- docker/docker-compose.yaml | 23 +++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 docker/docker-compose.yaml diff --git a/cc-chatbot/docker/README.md b/cc-chatbot/docker/README.md index b75ca18..90c3cb0 100644 --- a/cc-chatbot/docker/README.md +++ b/cc-chatbot/docker/README.md @@ -13,4 +13,36 @@ Copy `.env.sample` to `.env` and modify as needed to customize configuration. Alternatively, set environment variables directly in your shell before running the `docker-compose up` command. For example: ```bash OLLAMA_URL=http://host.docker.internal:11434 PREFETCH_MODEL=0 INFERENCE_MODEL=llama3.2:3B docker-compose up --build -``` \ No newline at end of file +``` + + +## Populating a vector store + +Spin up a llama-stack instance where you want the vector store to live: + +```bash +uv run --with llama-stack==0.4.1 llama stack run starter +``` + +Wait until you see the `Uvicorn running on ` message. Then, test everything works: + +```bash +# Set environment variables +export OPENAI_BASE_URL=http://localhost:8321/v1 +export OPENAI_API_KEY=none # Llama Stack doesn't require a real key +export OPENAI_EMBEDDING_MODEL=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 +export OPENAI_EMBEDDING_DIMENSIONS=768 + +# Set your Athena credentials +export ATHENA_OUTPUT_BUCKET=s3://cc-vec-damian-01/test-results +export AWS_PROFILE=cc-volunteers +export AWS_DEFAULT_REGION=us-east-1 + +# Use cc-vec with local models +uv run cc-vec index --url-patterns "%commoncrawl.org" --limit 10 +``` + +If it succeeds run again with `--limit 1000` to index everything + + +> Note: if running debug locally, llama stack needs additional pip packages `sentence-transformers einops` diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml new file mode 100644 index 0000000..d93e2b1 --- /dev/null +++ b/docker/docker-compose.yaml @@ -0,0 +1,23 @@ +services: + cc-vec-bot: + build: + context: .. + dockerfile: docker/Dockerfile.cc-vec-bot + args: + PREFETCH_MODEL: ${PREFETCH_MODEL:-1} + INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} + image: cc-vec-bot + environment: + LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001} + CHATBOT_PORT: ${CHATBOT_PORT:-8008} + OLLAMA_URL: ${OLLAMA_URL:-} + OLLAMA_STREAMING: ${OLLAMA_STREAMING:-1} + INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} + ports: + - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" + - "${CHATBOT_PORT:-8008}:${CHATBOT_PORT:-8008}" + - "11434:11434" + volumes: + - ~/.llama:/root/.llama + + From 8b0201eda31e54903bc8cb7153f54c423f0f3052 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Mon, 19 Jan 2026 17:02:13 +0100 Subject: [PATCH 11/11] wip: chatbot instructions iteration --- cc-chatbot/chatbot/README.md | 33 +++++++++++++++++++++++++ cc-chatbot/docker/Dockerfile.cc-vec-bot | 2 +- docker/docker-compose.yaml | 23 ----------------- 3 files changed, 34 insertions(+), 24 deletions(-) delete mode 100644 docker/docker-compose.yaml diff --git a/cc-chatbot/chatbot/README.md b/cc-chatbot/chatbot/README.md index 3b06cc1..b0313ab 100644 --- a/cc-chatbot/chatbot/README.md +++ b/cc-chatbot/chatbot/README.md @@ -20,3 +20,36 @@ OLLAMA_URL=http://localhost:11434 # /api/generate is appended INFERENCE_MODEL=tinyllama STREAMING=1 ```` + +## Manual / Development + +Make sure ollama is running, then open 2 terminal windows. + +In the first, launch llama stack configured to talk to ollama: + +```bash +OLLAMA_URL=http://localhost:11434/v1 uv run --with llama-stack==0.4.1 llama stack run starte +``` + +In the second, launch the cc chatbot: + +```bash +cd cc-chatbot/chatbot +OLLAMA_URL=http://localhost:8321 uvicorn api:app --reload +``` + +## Building the vector store + +Make sure ollama is running, then open 2 terminal windows. + +In the first, launch llama stack configured to talk to ollama: + +```bash +OLLAMA_URL=http://localhost:11434/v1 uv run --with llama-stack==0.4.1 llama stack run starte +``` + +In the second, run cc-vec: + +```bash +uv run cc-vec index --url-patterns "%commoncrawl.org" --limit 1000 --vector-store-name 'commoncrawl-org-v1' --chunk-size 800 --overlap 400 +``` diff --git a/cc-chatbot/docker/Dockerfile.cc-vec-bot b/cc-chatbot/docker/Dockerfile.cc-vec-bot index 0f0f81a..58d365f 100644 --- a/cc-chatbot/docker/Dockerfile.cc-vec-bot +++ b/cc-chatbot/docker/Dockerfile.cc-vec-bot @@ -20,7 +20,7 @@ ENV PATH="/usr/local/bin:${PATH}" # # Build examples: # docker build --build-arg PREFETCH_MODEL=1 -t cc-vec-bot . # bake tinyllama -# docker build --build-arg PREFETCH_MODEL=1 --build-arg INFERENCE_MODEL=llama3.2:3b -t cc-vec-bot . +# docker build --build-arg PREFETCH_MODEL=1 --build-arg INFERENCE_MODEL=llama3.2:3b -t cc-vec-bot . # bake llama3.2:3b # docker build -t cc-vec-bot . # no prefetch (default) # --------------------------------------------------------------------------- ARG PREFETCH_MODEL=0 diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml deleted file mode 100644 index d93e2b1..0000000 --- a/docker/docker-compose.yaml +++ /dev/null @@ -1,23 +0,0 @@ -services: - cc-vec-bot: - build: - context: .. - dockerfile: docker/Dockerfile.cc-vec-bot - args: - PREFETCH_MODEL: ${PREFETCH_MODEL:-1} - INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} - image: cc-vec-bot - environment: - LLAMA_STACK_PORT: ${LLAMA_STACK_PORT:-5001} - CHATBOT_PORT: ${CHATBOT_PORT:-8008} - OLLAMA_URL: ${OLLAMA_URL:-} - OLLAMA_STREAMING: ${OLLAMA_STREAMING:-1} - INFERENCE_MODEL: ${INFERENCE_MODEL:-tinyllama} - ports: - - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" - - "${CHATBOT_PORT:-8008}:${CHATBOT_PORT:-8008}" - - "11434:11434" - volumes: - - ~/.llama:/root/.llama - -