All configuration is done via environment variables. Copy .env.example to .env and customize.
| Variable | Description |
|---|---|
HUGGINGFACE_TOKEN |
Your HuggingFace API token |
DATABASE_URL |
Database connection string |
# Your HuggingFace token (get from https://huggingface.co/settings/tokens)
HUGGINGFACE_TOKEN=hf_xxxxx
# Model to use for SQL generation
TEXT2SQL_MODEL=Qwen/Qwen2.5-Coder-7B-Instruct| Model | Size | Best For |
|---|---|---|
Qwen/Qwen2.5-Coder-7B-Instruct |
7B | General SQL (recommended) |
Qwen/Qwen2.5-Coder-32B-Instruct |
32B | Complex queries |
meta-llama/Llama-3.2-3B-Instruct |
3B | Fast responses |
deepseek-ai/DeepSeek-R1-Distill-Qwen-32B |
32B | Advanced reasoning |
Uses HuggingFace's hosted inference. Free tier available.
AGENT_MODEL_BACKEND=hf_inference
AGENT_INFERENCE_PROVIDER= # Leave empty for auto-routing
AGENT_INFERENCE_TIMEOUT=120
AGENT_USE_LEGACY_FALLBACK=falseImportant: Do NOT set AGENT_INFERENCE_PROVIDER=hf-inference - that is not a valid provider name. Leave it empty or use specific providers like nebius, together, fireworks.
Runs the model on your machine. Requires GPU for reasonable performance.
AGENT_MODEL_BACKEND=local
MODEL_DEVICE=cuda # cuda, cpu, mps, or auto
ENABLE_8BIT_QUANTIZATION=false # Reduces memory usage
ENABLE_4BIT_QUANTIZATION=false # Maximum compression# Connection string (supports PostgreSQL, MySQL, SQLite)
DATABASE_URL=postgresql://user:password@localhost:5432/mydb
# Connection pool
DB_POOL_SIZE=5
DB_MAX_OVERFLOW=10
DB_POOL_TIMEOUT=30# PostgreSQL
DATABASE_URL=postgresql://user:pass@localhost:5432/dbname
# MySQL
DATABASE_URL=mysql://user:pass@localhost:3306/dbname
# SQLite
DATABASE_URL=sqlite:///./data/app.db
# PostgreSQL with SSL (Neon, Supabase, etc.)
DATABASE_URL=postgresql://user:pass@host.neon.tech/db?sslmode=requireAGENT_ENABLED=true # Enable the agent
AGENT_MAX_STEPS=5 # Max reasoning steps (3-5 recommended)
AGENT_MIN_CONFIDENCE=0.7 # Minimum confidence threshold
AGENT_ENABLE_VALIDATION=true # Validate SQL before returning
AGENT_EXECUTION_TIMEOUT=30 # Query execution timeout (seconds)API_HOST=0.0.0.0
API_PORT=8000
API_DEBUG=false
# CORS (comma-separated origins)
CORS_ORIGINS=http://localhost:3000,http://localhost:8080SECRET_KEY=your-secret-key-here # For JWT tokens
# Enable auth enforcement
AUTH_ENABLED=true
JWT_AUTH_ENABLED=true
API_KEY_AUTH_ENABLED=true
# API keys (comma-separated, optional scopes)
API_KEYS=key-1:read|write
API_KEY_SCOPES=read
# Optional local user credentials for issuing JWTs
# Format: username:password:scope1|scope2;username2:password:scope1|scope2
AUTH_USERS=admin:change_me:admin|write|read
# JWT claim names for scopes/roles
JWT_SCOPES_CLAIM=scopes
JWT_ROLE_CLAIM=role
# Scopes required for mutation/management endpoints
MUTATION_SCOPES=write,admin
# Rate limiting
RATE_LIMIT_PER_MINUTE=60
RATE_LIMIT_BURST=10
RATE_LIMIT_STORAGE_URL=redis://localhost:6379/1
RATE_LIMIT_HEADERS_ENABLED=trueENABLE_METRICS=true
METRICS_PORT=9090
ENABLE_TRACING=false
OTLP_ENDPOINT=http://localhost:4317# Toggle caching (schema/prompt/inference outputs)
CACHE_ENABLED=true
# Optional Redis backend for shared cache
REDIS_URL=redis://localhost:6379/0
# TTLs (seconds)
CACHE_TTL=3600
CACHE_QUERY_TTL=3600
CACHE_MODEL_TTL=7200
CACHE_SCHEMA_TTL=86400
# In-memory cache size when Redis unavailable
CACHE_MAX_MEMORY_ENTRIES=1000MULTIDB_ENABLED=true
MULTIDB_MAX_DATABASES=50
MULTIDB_DEFAULT_POOL_SIZE=5
MULTIDB_HEALTH_CHECK_INTERVAL=60FEWSHOT_ENABLED=true
FEWSHOT_EMBEDDING_STRATEGY=hash# Required
HUGGINGFACE_TOKEN=hf_xxxxx
DATABASE_URL=postgresql://user:pass@localhost:5432/mydb
# Inference (HF API - recommended)
TEXT2SQL_MODEL=Qwen/Qwen2.5-Coder-7B-Instruct
AGENT_MODEL_BACKEND=hf_inference
AGENT_INFERENCE_PROVIDER=
AGENT_INFERENCE_TIMEOUT=120
# Agent
AGENT_ENABLED=true
AGENT_MAX_STEPS=5
AGENT_MIN_CONFIDENCE=0.7
# API
API_HOST=0.0.0.0
API_PORT=8000