Orpheus-FastAPI/.env.example at main · Lex-au/Orpheus-FastAPI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Orpheus-FastAPI Configuration
# Copy this file to .env and customize as needed

# Server connection settings
ORPHEUS_API_URL=http://127.0.0.1:1234/v1/completions
ORPHEUS_API_TIMEOUT=120 # You should scale this value based on max tokens and your inference speed

# Generation parameters
ORPHEUS_MAX_TOKENS=8192 # If you want longer completions, increase this value
ORPHEUS_TEMPERATURE=0.6
ORPHEUS_TOP_P=0.9
# Repetition penalty is now hardcoded to 1.1 for stability (this is a model constraint) - this setting is no longer used
# ORPHEUS_REPETITION_PENALTY=1.1
ORPHEUS_SAMPLE_RATE=24000
ORPHEUS_MODEL_NAME=Orpheus-3b-FT-Q8_0.gguf # Model name sent to inference server (Q2_K, Q4_K_M, or Q8_0 variants)

# Web UI settings (keep in mind that the web UI is not secure and should not be exposed to the internet)
ORPHEUS_PORT=5005
ORPHEUS_HOST=0.0.0.0