diff --git a/docs.json b/docs.json index d2cb9edd..b067b563 100644 --- a/docs.json +++ b/docs.json @@ -174,6 +174,36 @@ } ] }, + { + "tab": "CometChat On-Prem", + "dropdowns": [ + { + "dropdown": "Docker", + "icon": "/images/icons/docker.svg", + "pages": [ + "fundamentals/cometchat-on-prem/docker/overview", + "fundamentals/cometchat-on-prem/docker/prerequisites", + "fundamentals/cometchat-on-prem/docker/quick-start", + "fundamentals/cometchat-on-prem/docker/production-deployment", + "fundamentals/cometchat-on-prem/docker/configuration-reference", + "fundamentals/cometchat-on-prem/docker/security", + "fundamentals/cometchat-on-prem/docker/persistence-and-backup", + "fundamentals/cometchat-on-prem/docker/monitoring", + "fundamentals/cometchat-on-prem/docker/scaling", + "fundamentals/cometchat-on-prem/docker/upgrades", + "fundamentals/cometchat-on-prem/docker/air-gapped-deployment", + "fundamentals/cometchat-on-prem/docker/troubleshooting" + ] + }, + { + "dropdown": "Kubernetes", + "icon": "/images/icons/kubernetes.svg", + "pages": [ + "fundamentals/cometchat-on-prem/kubernetes/overview" + ] + } + ] + }, { "tab": "Widget Builder", "dropdowns": [ @@ -837,7 +867,7 @@ "icon": "/images/icons/react.svg", "versions": [ { - "version": "v5\u200e", + "version": "v5‎", "groups": [ { "group": " ", @@ -924,7 +954,7 @@ ] }, { - "version": "v4\u200e", + "version": "v4‎", "groups": [ { "group": " ", @@ -1087,7 +1117,7 @@ ] }, { - "version": "v3\u200e", + "version": "v3‎", "groups": [ { "group": " ", @@ -1109,7 +1139,7 @@ ] }, { - "version": "v2\u200e", + "version": "v2‎", "groups": [ { "group": " ", @@ -1137,7 +1167,7 @@ "icon": "/images/icons/swift.svg", "versions": [ { - "version": "v5\u200e\u200e", + "version": "v5‎‎", "groups": [ { "group": " ", @@ -1242,7 +1272,7 @@ ] }, { - "version": "v4\u200e\u200e", + "version": "v4‎‎", "groups": [ { "group": " ", @@ -1406,7 +1436,7 @@ ] }, { - "version": "v3\u200e\u200e", + "version": "v3‎‎", "groups": [ { "group": " ", @@ -1428,7 +1458,7 @@ ] }, { - "version": "v2\u200e\u200e", + "version": "v2‎‎", "groups": [ { "group": " ", @@ -1456,7 +1486,7 @@ "icon": "/images/icons/android.svg", "versions": [ { - "version": "v5\u200e\u200e\u200e", + "version": "v5‎‎‎", "groups": [ { "group": " ", @@ -1559,7 +1589,7 @@ ] }, { - "version": "v4\u200e\u200e\u200e", + "version": "v4‎‎‎", "groups": [ { "group": " ", @@ -1716,7 +1746,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e", + "version": "v3‎‎‎", "groups": [ { "group": " ", @@ -1741,7 +1771,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e", + "version": "v2‎‎‎", "groups": [ { "group": " ", @@ -1770,7 +1800,7 @@ "icon": "/images/icons/flutter.svg", "versions": [ { - "version": "v5\u200e\u200e\u200e\u200e", + "version": "v5‎‎‎‎", "groups": [ { "group": " ", @@ -1876,7 +1906,7 @@ ] }, { - "version": "v4\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎", "groups": [ { "group": " ", @@ -2052,7 +2082,7 @@ "icon": "/images/icons/angular.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎", "groups": [ { "group": " ", @@ -2242,7 +2272,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎", "groups": [ { "group": " ", @@ -2263,7 +2293,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e\u200e\u200e", + "version": "v2‎‎‎‎‎", "groups": [ { "group": " ", @@ -2290,7 +2320,7 @@ "icon": "/images/icons/vuejs.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -2383,7 +2413,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -2405,7 +2435,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v2‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -2437,7 +2467,7 @@ "icon": "/images/icons/js.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -2570,7 +2600,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -2693,7 +2723,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v2‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -2796,7 +2826,7 @@ "icon": "/images/icons/react.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -2911,7 +2941,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3025,7 +3055,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v2‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3127,7 +3157,7 @@ "icon": "/images/icons/swift.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3250,7 +3280,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3368,7 +3398,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v2‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3475,7 +3505,7 @@ "icon": "/images/icons/android.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3590,7 +3620,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3710,7 +3740,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v2‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3813,7 +3843,7 @@ "icon": "/images/icons/flutter.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -3927,7 +3957,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -4032,7 +4062,7 @@ "icon": "/images/icons/ionic.svg", "versions": [ { - "version": "v4\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v4‎‎‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -4140,7 +4170,7 @@ ] }, { - "version": "v3\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v3‎‎‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -4250,7 +4280,7 @@ ] }, { - "version": "v2\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e\u200e", + "version": "v2‎‎‎‎‎‎‎‎‎‎‎‎", "groups": [ { "group": " ", @@ -5005,20 +5035,20 @@ ] }, { - "tab": "Custom Bots", + "tab": "Custom Bots", "hidden": true, "pages": [ "/ai-chatbots/custom-bots" ] }, { - "tab": "AI Bots (Legacy)", + "tab": "AI Bots (Legacy)", "hidden": true, "pages": [ - "/ai-chatbots/ai-bots/overview", - "/ai-chatbots/ai-bots/instructions", - "/ai-chatbots/ai-bots/bots" - ] + "/ai-chatbots/ai-bots/overview", + "/ai-chatbots/ai-bots/instructions", + "/ai-chatbots/ai-bots/bots" + ] } ] }, @@ -5716,7 +5746,7 @@ { "source": "/ai-agents/tools", "destination": "/ai-agents/mastra-tools" - }, + }, { "source": "/ai-chatbots/overview", "destination": "/ai-chatbots/ai-bots/overview" @@ -5740,7 +5770,7 @@ "metatags": { "charset": "UTF-8", "viewport": "width=device-width, initial-scale=1.0", - "description": "Learn how to integrate, customize, and scale real-time chat using CometChat\u2019s UI Kits, SDKs, and widgets across popular frameworks. Get step-by-step guides, best practices, and implementation details to build production-ready chat experiences.", + "description": "Learn how to integrate, customize, and scale real-time chat using CometChat’s UI Kits, SDKs, and widgets across popular frameworks. Get step-by-step guides, best practices, and implementation details to build production-ready chat experiences.", "language": "en" } }, @@ -5749,4 +5779,4 @@ "redirect": true } } -} +} \ No newline at end of file diff --git a/fundamentals/cometchat-on-prem/docker/air-gapped-deployment.mdx b/fundamentals/cometchat-on-prem/docker/air-gapped-deployment.mdx new file mode 100644 index 00000000..62d47786 --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/air-gapped-deployment.mdx @@ -0,0 +1,23 @@ +--- +title: "Air-Gapped Deployment" +sidebarTitle: "Air-Gapped" +--- + +Guidelines for deploying the platform in offline or isolated (air-gapped) environments. + +## Offline installation steps + +- Export required Docker images with `docker save` +- Transfer images via removable media, secure copy (SSH), or an isolated internal network +- Import images on the target system with `docker load` + +## Local registry + +- Host images in Harbor, Nexus, or a private Docker registry +- Enforce role-based access control (RBAC) and image retention policies + +## Limitations in air-gapped mode + +- No access to external push notification services +- No S3 or other cloud object storage unless internally emulated +- No cloud-hosted analytics, logging, or monitoring integrations diff --git a/fundamentals/cometchat-on-prem/docker/configuration-reference.mdx b/fundamentals/cometchat-on-prem/docker/configuration-reference.mdx new file mode 100644 index 00000000..ce0ea5ae --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/configuration-reference.mdx @@ -0,0 +1,119 @@ +--- +title: "Configuration Reference" +sidebarTitle: "Configuration" +--- + +Use this reference when updating domains, migrating environments, troubleshooting misconfiguration, or performing production deployments. Values are sourced from `docker-compose.yml`, service-level `.env` files, and the domain update guide. + +Use this when: +- Updating domains +- Migrating environments +- Troubleshooting service misconfiguration +- Performing production deployments + +## Global notes + +- All services read environment variables from their respective directories. +- Domain values must be updated consistently across API, WebSocket, Notifications, Webhooks, and NGINX configurations. +- Changing the primary domain impacts reverse proxy routing, OAuth headers, CORS, webhook endpoints, and TiDB host references. + +## Chat API + +Update these values when changing domains: + +- `MAIN_DOMAIN=""` +- `EXTENSION_DOMAIN=""` +- `WEBHOOKS_BASE_URL="https://webhooks./v1/webhooks"` +- `TRIGGERS_BASE_URL="https://webhooks./v1/triggers"` +- `EXTENSION_BASE_URL="https://notifications."` +- `MODERATION_ENABLED=true` +- `RULES_BASE_URL="https://moderation./v1/moderation-service"` +- `ADMIN_API_HOST="api."` +- `CLIENT_API_HOST="apiclient."` +- `ALLOWED_API_DOMAINS=","` +- `DB_HOST="tidb."` +- `DB_HOST_CREATOR="tidb."` +- `V3_CHAT_HOST="websocket."` + +## Management API (MGMT API) + +- `ADMIN_API_HOST="api."` +- `CLIENT_API_HOST="apiclient."` +- `APP_HOST="dashboard."` +- `API_HOST="https://mgmt-api."` +- `MGMT_DOMAIN=""` +- `MGMT_DOMAIN_TO_REPLACE=""` +- `RULES_BASE_URL="https://moderation./v1/moderation"` +- `ACCESS_CONTROL_ALLOW_ORIGIN=","` + +## WebSocket + +Hostnames are derived automatically from NGINX and Chat API configuration; no manual domain updates are required. + +## Notifications service + +- `CC_DOMAIN=""` (controls routing, token validation, and push delivery) + +## Moderation service + +- `CHAT_API_URL=""` for rule evaluation, metadata retrieval, and decision submission + +## Webhooks service + +- `CHAT_API_DOMAIN=""` - must match the Chat API domain exactly to avoid retries or signature verification failures + +## Extensions + +```json +"DOMAINS": [ + "", + "", + "" +], +"DOMAIN_NAME": "" +``` + +Defines CORS and allowed origins for extension traffic. + +## Receipt Updater + +- `RECEIPTS_MYSQL_HOST="tidb."` for delivery receipts, read receipts, and thread metadata + +## SQL Consumer + +```json +"CONNECTION_CONFIG": { + "host": "" +}, +"ALTER_USER_CONFIG": { + "host": "" +}, +"API_CONFIG": { + "API_DOMAIN": "" +} +``` + +Controls database migrations, multi-tenant provisioning, and internal requests to Chat API. + +## NGINX configuration files + +Update domain values in: + +- chatapi.conf +- extensions.conf +- mgmtapi.conf +- notifications.conf +- dashboard.conf +- globalwebhooks.conf +- moderation.conf +- websocket.conf + +These govern TLS termination, routing, reverse proxy rules, and WebSocket upgrades. + +## Summary of domain values to update + +- Chat API, Client API, and Management API +- Notifications, Moderation, Webhooks, and Extensions services +- NGINX reverse proxy hostnames +- TiDB host references +- WebSocket host configuration in Chat API diff --git a/fundamentals/cometchat-on-prem/docker/monitoring.mdx b/fundamentals/cometchat-on-prem/docker/monitoring.mdx new file mode 100644 index 00000000..e858bb3c --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/monitoring.mdx @@ -0,0 +1,28 @@ +--- +title: "Monitoring" +sidebarTitle: "Monitoring" +--- + +Monitoring ensures system health, operational visibility, and SLA compliance. + +## Observability stack + +- Prometheus for metrics collection +- Grafana for dashboards and visualizations +- Loki (or ELK) for centralized log aggregation + +## Key service metrics to track + +- Kafka consumer lag +- WebSocket active connection count +- Redis memory utilization and cache hit ratio +- TiDB region health and TiKV store availability + +## Alerting recommendations + +- Sustained CPU utilization above 80% +- Database query latency exceeding 100 ms +- Kafka consumer lag breaching defined thresholds +- WebSocket connection drops or abnormal failure rate spikes + +Tune thresholds based on workload characteristics and traffic patterns. diff --git a/fundamentals/cometchat-on-prem/docker/overview.mdx b/fundamentals/cometchat-on-prem/docker/overview.mdx new file mode 100644 index 00000000..9c02c82b --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/overview.mdx @@ -0,0 +1,54 @@ +--- +title: "CometChat On-Prem Overview" +sidebarTitle: "Overview" +--- + +CometChat On-Prem is an enterprise deployment and operations blueprint for a high-performance, real-time messaging platform built for reliability, low latency, and horizontal scale. It covers deployments from roughly 10k MAU up to 250k+ MAU and establishes the foundations for even higher workloads. + +## Who this guide is for + +- DevOps and SRE teams responsible for uptime and operations +- Platform, cloud, and backend engineers deploying or tuning the stack +- Infrastructure architects planning multi-region, failover, or compliance-heavy environments + +## What the platform does + +- Real-time messaging for 1:1 and group chat with persistent history +- WebSocket event streaming for presence, typing indicators, and delivery/read receipts +- Distributed event pipeline (Kafka) for decoupled microservices communication +- Notifications subsystem for asynchronous push fan-out +- Moderation services with rule-based filtering and optional AI adapters +- Webhooks engine for outbound callbacks with retries and signature validation +- Horizontally scalable REST APIs for chat, users, groups, and metadata + +## Data & storage + +- TiDB cluster (PD, TiKV, TiDB SQL) as the primary relational store for users, conversations, groups, and message metadata +- MongoDB for flexible metadata, moderation data, and unstructured fields +- Three Redis clusters for caching, pub/sub, session state, and other fast-access needs +- Kafka as the event backbone for real-time messaging and inter-service pipelines +- Optional object storage (e.g., Amazon S3, MinIO, Ceph) for media, logs, documents, and other large binaries when your application handles unstructured data across services + +## Deployment models + +- Local development (Docker Compose): single-machine environment for dependency bootstrapping, local development/QA, and CI pipelines. Not recommended for production workloads. +- Docker Swarm (recommended up to ~200k MAU / ~20k PCC): current reference architecture with lightweight cluster management, predictable service placement, secure overlay networks, and rolling updates. +- Kubernetes (enterprise, multi-region, or >200k MAU): best when you need advanced autoscaling, cross-region failover, service mesh/mTLS, cloud-native Kafka, or strict compliance requirements. Contact us for enterprise Kubernetes architecture guidance. + +## High-level architecture + + + + + +- NGINX for TLS termination, routing, WebSocket upgrades, and load balancing +- WebSocket gateway for real-time connections, presence events, and device sessions +- Chat API for messaging logic across users, groups, conversations, and metadata +- Moderation engine for policy-based filtering and compliance checks +- Notifications service for asynchronous push notifications and event fan-out +- Webhooks service for outbound callbacks with retries +- Kafka as the central event backbone +- TiDB, MongoDB, and Redis as the stateful data stores +- Observability stack (Prometheus, Grafana, Loki/ELK) for metrics, dashboards, and logs +- Host and network: private overlay networks isolating backend traffic and optimizing latency + diff --git a/fundamentals/cometchat-on-prem/docker/persistence-and-backup.mdx b/fundamentals/cometchat-on-prem/docker/persistence-and-backup.mdx new file mode 100644 index 00000000..d9189588 --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/persistence-and-backup.mdx @@ -0,0 +1,31 @@ +--- +title: "Persistence & Backup" +sidebarTitle: "Persistence & Backup" +--- + +Defines how persistent data is stored, backed up, and restored in production environments. + +## Volume layout + +| Service | Default path | +| --- | --- | +| TiKV | `/data` | +| PD | `/data` | +| Kafka | `/var/lib/kafka/data` | +| Redis | `/data` | +| MongoDB | `/data/db` | + +All persistent volumes should be backed by SSD or NVMe storage for production deployments. + +## Backup strategy + +- TiDB: daily backups to secure, off-cluster storage +- Kafka: weekly segment-level backups +- Redis: RDB snapshots every 6 hours (cache data is non-authoritative) +- Backup validation: monthly restore and integrity verification tests + +## Disaster recovery + +- Validate full restore procedures at least once per quarter +- Maintain a minimum of three geographically isolated backup copies +- Run staged disaster recovery simulations such as warm-standby restoration and full cluster rehydration from backups diff --git a/fundamentals/cometchat-on-prem/docker/prerequisites.mdx b/fundamentals/cometchat-on-prem/docker/prerequisites.mdx new file mode 100644 index 00000000..b75a4fbc --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/prerequisites.mdx @@ -0,0 +1,53 @@ +--- +title: "Prerequisites" +sidebarTitle: "Prerequisites" +--- + +## Supported operating systems + +- Ubuntu 20.04 / 22.04 / 24.04 LTS +- RedHat Enterprise Linux 8+ + +## Required software + +- Docker Engine >= 24 +- Docker Compose >= v2 +- Git +- OpenSSL >= 1.1 +- jq, curl, net-tools + +## Minimum hardware (testing / QA) + +- 8 vCPUs +- 16 GB RAM +- 100 GB SSD (minimum; scale up based on workload and storage needs) + +## Production hardware + +### Baseline sizing + +| MAU | Peak concurrent connections (PCC) | vCPUs | RAM | +| --- | --- | --- | --- | +| 10k | 500 | 32 | 64 GiB | +| 25k | 1,250 | 64 | 128 GiB | +| 50k | 2,500 | 96 | 192 GiB | +| 100k | 5,000 | 156 | 312 GiB | +| 200k | 10,000 | 272 | 544 GiB | + +Storage guidance: start at 100 GB SSD and scale to 500 GB to 2 TB SSD depending on workload and data retention. + +### High-concurrency sizing + +| MAU | Peak concurrent connections (PCC) | vCPUs | RAM | +| --- | --- | --- | --- | +| 10k | 1,000 | 48 | 96 GiB | +| 25k | 2,500 | 96 | 192 GiB | +| 50k | 5,000 | 156 | 312 GiB | +| 100k | 10,000 | 240 | 480 GiB | +| 200k | 20,000 | 480 | 960 GiB | + +Storage guidance: expect to exceed 100 GB SSD; plan 500 GB to 2 TB SSD as concurrency and data volume grow. + +## Required ports + +- 80 / 443 to NGINX (HTTP / HTTPS) diff --git a/fundamentals/cometchat-on-prem/docker/production-deployment.mdx b/fundamentals/cometchat-on-prem/docker/production-deployment.mdx new file mode 100644 index 00000000..74d5463b --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/production-deployment.mdx @@ -0,0 +1,86 @@ +--- +title: "Production Deployment" +sidebarTitle: "Production Deployment" +--- + +Deploy the full stack on Docker Swarm using the provided scripts. + +## Initialize Docker Swarm (manager node) + +```bash +docker swarm init +docker node ls +``` + +## Deploy the complete infrastructure + +From the repository root: + +```bash +./deploy.sh +``` + +The deployment script initializes required containers, creates Docker networks and volumes, deploys all Swarm services defined in `docker-compose.yml`, and starts components in the correct order. + +## Rolling updates + +```bash +./update.sh +``` + +Performs zero-downtime rolling updates, refreshes configuration, and replaces containers at the Swarm layer. + +## Production NGINX reverse proxy + +NGINX handles TLS termination, API routing, WebSocket upgrades, and proxy buffering. Update domain mappings in: + +- chatapi.conf +- extensions.conf +- mgmtapi.conf +- notifications.conf +- dashboard.conf +- globalwebhooks.conf +- moderation.conf +- websocket.conf + +## Domain configuration (production) + +When replacing ``, update environment values for Chat API, Management API, Extensions, Notifications, Moderation, Webhooks, SQL Consumer, and NGINX reverse proxy hostnames. Ensure the WebSocket host in Chat API aligns with the chosen domain (for example, `chat.example.com`). + +## Useful production commands (Swarm operations) + +General management: + +```bash +docker node ls +docker service ls +docker service ps +docker service logs -f +docker exec -it bash +docker node update --availability drain +docker service scale =3 +``` + +Stack operations: + +```bash +docker stack deploy -c docker-compose.yml cometchat +docker stack rm cometchat +docker stack services cometchat +docker stack ps cometchat +``` + +## Health check endpoints + +| Component | URL | +| --- | --- | +| Dashboard | `https://app.example.com` | +| Chat API | `https://api-us.example.com/health-check` | +| Client API | `https://apiclient-us.example.com/health-check` | +| Management API | `https://apimgmt.example.com/health-check` | +| Notifications | `https://notifications-us.example.com/health-check` | +| Moderation | `https://rule-us.example.com/health` | +| WebSocket | `https://websocket-us.example.com/v1/health` | +| Webhooks | `https://webhooks-us.example.com/v1/webhooks/health-check` | + +Replace `example.com` with your actual domain, such as `chat.example.com`. diff --git a/fundamentals/cometchat-on-prem/docker/quick-start.mdx b/fundamentals/cometchat-on-prem/docker/quick-start.mdx new file mode 100644 index 00000000..26aeda57 --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/quick-start.mdx @@ -0,0 +1,54 @@ +--- +title: "Quick Start (Local Deployment)" +sidebarTitle: "Quick Start" +--- + +Run the platform locally on a single machine with Docker Compose for development and QA. This setup is not hardened for production workloads. + +## Install Git + +```bash +sudo apt update -y +sudo apt install git -y +``` + +## Clone the repository + +```bash +git clone https://github.com/cometchat-team/cometchat-chat-api-infra-backend.git +cd cometchat-chat-api-infra-backend +git switch docker-swarm-setup-x86 +``` + +## Install Docker & Docker Compose (local only) + +Use the included script, then verify the installation: + +```bash +./install-docker-ubuntu.sh +docker --version +docker-compose --version +``` + +## Start the local Docker environment + +```bash +docker compose pull +docker compose up -d +``` + +Result: all services required for local development start on one host. + +## Validate the local deployment + +- WebSocket Gateway: `http://localhost/v1/health` +- Chat API health check: `http://localhost/health` + +## What this local setup includes + +- Kafka +- Redis +- MongoDB +- Local Docker volumes for persistence + +> Intended for development, debugging, and API testing only. Do not use this environment for production traffic. diff --git a/fundamentals/cometchat-on-prem/docker/scaling.mdx b/fundamentals/cometchat-on-prem/docker/scaling.mdx new file mode 100644 index 00000000..17db881d --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/scaling.mdx @@ -0,0 +1,60 @@ +--- +title: "Scaling" +sidebarTitle: "Scaling" +--- + +Guidelines for scaling platform components based on load and resource requirements. + +## Vertical scaling + +Increase system resource limits and tune configurations to handle more load on existing servers: + +- Raise file descriptor limits +- Tune kernel network queues (`somaxconn`, `netdev_max_backlog`) +- Increase worker processes and thread pools where supported + +### Configure file descriptor limits + +1. Edit `/etc/security/limits.conf` and add: + +``` +* soft nofile 500000 +* hard nofile 500000 +root soft nofile 500000 +root hard nofile 500000 +``` + +2. Configure systemd defaults: + +```bash +echo "DefaultLimitNOFILE=500000" | sudo tee -a /etc/systemd/system.conf +echo "DefaultLimitNOFILE=500000" | sudo tee -a /etc/systemd/user.conf +``` + +3. Reboot to apply changes: + +```bash +sudo reboot +``` + +4. Verify: + +```bash +ulimit -n +``` + +## When to migrate to Kubernetes + +Consider Kubernetes when: + +- MAU exceeds ~200k +- You need multi-region deployments or failover +- Sub-50 ms latency targets are critical +- Dynamic autoscaling and elasticity are operational priorities (HPA/VPA) + +### Horizontal scaling guidelines + +- WebSocket Gateway: add ~1 replica per 1,000-1,500 peak concurrent connections (PCC) +- Chat API: scale out when average CPU utilization exceeds ~60% +- Kafka: increase partition count to improve throughput and parallelism +- Redis: enable Redis Cluster mode when deployments exceed ~200k MAU to distribute data and improve scalability diff --git a/fundamentals/cometchat-on-prem/docker/security.mdx b/fundamentals/cometchat-on-prem/docker/security.mdx new file mode 100644 index 00000000..03f0bb03 --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/security.mdx @@ -0,0 +1,86 @@ +--- +title: "Security" +sidebarTitle: "Security" +--- + +Security controls focus on authentication, secrets management, network isolation, TLS posture, and protective controls against abuse. + +## Authentication + +Use JWT-based authentication with RSA key pairs so only authorized users interact with the platform. Single sign-on via OIDC or SAML 2.0 can be layered on when needed. + +Actionable steps: +- Generate an RSA key pair (example): + +```bash +openssl genpkey -algorithm RSA -out private.key -pkeyopt rsa_keygen_bits:2048 +openssl rsa -pubout -in private.key -out public.key +``` + +- Validate JWTs in your backend with the public key. +- Rotate signing keys every 30-90 days for long-running deployments. + +## Secrets management + +Centralize and encrypt sensitive data such as passwords, API keys, and tokens. + +- HashiCorp Vault for RBAC, audit logs, and encrypted storage. + - Store a secret: `vault kv put secret/dbpassword value="your-secure-password"` + - Retrieve secrets via Vault APIs or client libraries. +- Docker Swarm secrets for encrypted in-transit and at-rest injection. + - Create and use a secret: + +```bash +echo "your-secure-password" | docker secret create db_password - +``` + +Include the secret in your compose or service definitions. + +## Network security + +Run backend services on private overlay networks and expose only NGINX to the internet. + +Actionable steps: +- Create a private overlay network: + +```bash +docker network create --driver overlay private_network +``` + +- Harden firewall rules to allow only the necessary ports (80/443) and block the rest, for example: + +```bash +sudo ufw allow 80,443/tcp +sudo ufw default deny incoming +sudo ufw enable +``` + +## TLS configuration + +Encrypt all traffic with TLS 1.2 or higher and plan for seamless certificate rotation. + +- Enforce strong TLS in NGINX: + +```nginx +ssl_protocols TLSv1.2 TLSv1.3; +ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:...'; +``` + +- Maintain at least two certificates to rotate without downtime. + +## Additional security measures + +- Rate limiting: protect against abuse or DDoS using NGINX rate limits: + +```nginx +limit_req_zone $binary_remote_addr zone=mylimit:10m rate=10r/s; +limit_req zone=mylimit burst=20; +``` + +- IP allowlisting: restrict access to sensitive services: + +```bash +sudo ufw allow from 192.168.1.0/24 to any port 3306 +``` + +- Log monitoring: collect and monitor logs (e.g., Prometheus alerts, Grafana dashboards, ELK/Loki) to detect suspicious activity. diff --git a/fundamentals/cometchat-on-prem/docker/troubleshooting.mdx b/fundamentals/cometchat-on-prem/docker/troubleshooting.mdx new file mode 100644 index 00000000..28317f79 --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/troubleshooting.mdx @@ -0,0 +1,50 @@ +--- +title: "Troubleshooting" +sidebarTitle: "Troubleshooting" +--- + +Common operational issues and debugging guidance. + +## Common problems and likely causes + +### 502 errors +- Possible cause: Chat API unreachable or unhealthy behind NGINX. +- Resolution: + - Ensure the Chat API service is running: `docker service ps ` + - Check NGINX logs and upstream configuration to verify routing and upstream health. + +### Kafka lag +- Possible cause: Consumer slowdown or insufficient partition count. +- Resolution: + - Check Kafka consumer lag: `kafka-consumer-groups --describe --group --bootstrap-server ` + - Increase partitions if needed: `kafka-topics --alter --partitions --topic --bootstrap-server ` + +### Redis eviction +- Possible cause: Memory pressure or incorrect eviction policy. +- Resolution: + - Inspect memory settings: `redis-cli config get maxmemory` and `redis-cli config get maxmemory-policy` + - Set an eviction policy such as `redis-cli config set maxmemory-policy allkeys-lru` + +### TiKV region errors +- Possible cause: Disk latency, resource contention, or store imbalance. +- Resolution: + - Check TiKV store status: `tiup cluster display` + - Rebalance regions if needed: `tiup cluster restart --force` + +## Debugging commands + +### Container and Swarm diagnostics +- View container logs: `docker logs ` +- Check service status: `docker service ps ` +- Inspect container details: `docker inspect ` + +### TiDB cluster status +- Display cluster status with TiUP: `tiup cluster display ` + +## Conclusion + +This Platform Deployment & Operations Guide provides comprehensive guidance for deploying, operating, and scaling a high-performance real-time messaging platform, from local development environments to enterprise-scale production deployments. It addresses deployment strategies, configuration management, security controls, monitoring, scaling, upgrades, and troubleshooting. + +Document Version: 1.0 +Last Updated: January 2026 +Prepared by: DevOps Team diff --git a/fundamentals/cometchat-on-prem/docker/upgrades.mdx b/fundamentals/cometchat-on-prem/docker/upgrades.mdx new file mode 100644 index 00000000..971d8489 --- /dev/null +++ b/fundamentals/cometchat-on-prem/docker/upgrades.mdx @@ -0,0 +1,24 @@ +--- +title: "Upgrades" +sidebarTitle: "Upgrades" +--- + +Recommended upgrade strategy to ensure zero downtime and safe production rollouts. + +## Rolling updates + +- Deploy new service replicas alongside existing ones +- Gradually shift traffic to the updated replicas +- Retire older replicas only after health checks pass + +## Database migrations + +- Always test migrations in a staging environment before production +- Prefer backward-compatible schema changes +- Avoid dropping or renaming columns while serving live traffic + +## Rollbacks + +- Retain previous Docker images to allow quick rollback +- Revert database changes using rollback or down migration scripts when required +- Validate application behavior and data integrity before restoring full traffic diff --git a/fundamentals/cometchat-on-prem/kubernetes/overview.mdx b/fundamentals/cometchat-on-prem/kubernetes/overview.mdx new file mode 100644 index 00000000..174d789e --- /dev/null +++ b/fundamentals/cometchat-on-prem/kubernetes/overview.mdx @@ -0,0 +1,6 @@ +--- +title: "CometChat On-Prem Overview" +sidebarTitle: "Overview" +--- + +Contact Us Page diff --git a/images/docker-on-prem-architecture.png b/images/docker-on-prem-architecture.png new file mode 100644 index 00000000..8f168a45 Binary files /dev/null and b/images/docker-on-prem-architecture.png differ diff --git a/images/icons/docker.svg b/images/icons/docker.svg new file mode 100644 index 00000000..eba6cc41 --- /dev/null +++ b/images/icons/docker.svg @@ -0,0 +1,12 @@ + + + + + + + \ No newline at end of file diff --git a/images/icons/kubernetes.svg b/images/icons/kubernetes.svg new file mode 100644 index 00000000..bedd3b88 --- /dev/null +++ b/images/icons/kubernetes.svg @@ -0,0 +1,84 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + +