From 1f5509fe0c8dada44290e98502321e69ea97866c Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Thu, 23 Apr 2026 11:24:11 +0200 Subject: [PATCH 01/15] fix: small change for testing updater --- control-plane/api-gateway/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/control-plane/api-gateway/src/main.rs b/control-plane/api-gateway/src/main.rs index 113fdb9..25530fd 100644 --- a/control-plane/api-gateway/src/main.rs +++ b/control-plane/api-gateway/src/main.rs @@ -162,7 +162,7 @@ async fn main() { .with_state(state); let addr = SocketAddr::from(([0, 0, 0, 0], 8000)); - tracing::info!(addr = %addr, "listening"); + tracing::info!(version = env!("CARGO_PKG_VERSION"), addr = %addr, "listening"); let listener = tokio::net::TcpListener::bind(addr).await.unwrap(); axum::serve( listener, From 31d3c29e80c596dc2395bdb9b178f1b3c8d527c6 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Thu, 23 Apr 2026 12:18:30 +0200 Subject: [PATCH 02/15] fix: handle 429 rate limit in agent registration with retry backoff --- agent/src/client.rs | 48 ++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/agent/src/client.rs b/agent/src/client.rs index ec33ebc..bedce01 100644 --- a/agent/src/client.rs +++ b/agent/src/client.rs @@ -2,6 +2,7 @@ use anyhow::{Context, Result}; use reqwest::Client; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use std::time::Duration; use uuid::Uuid; #[derive(Debug, Deserialize, Clone)] @@ -124,23 +125,38 @@ impl ApiClient { csr_pem, }; - let resp = self - .client - .post(&url) - .json(&body) - .send() - .await - .context("Failed to send registration request")?; - - if !resp.status().is_success() { - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - anyhow::bail!("Registration failed status={} body={}", status, body); + loop { + let resp = self + .client + .post(&url) + .json(&body) + .send() + .await + .context("Failed to send registration request")?; + + if resp.status() == reqwest::StatusCode::TOO_MANY_REQUESTS { + let retry_after = resp + .headers() + .get("Retry-After") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()) + .unwrap_or(60); + tracing::warn!(retry_after, "Registration rate-limited, retrying"); + tokio::time::sleep(Duration::from_secs(retry_after)).await; + continue; + } + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + anyhow::bail!("Registration failed status={} body={}", status, body); + } + + return resp + .json::() + .await + .context("Failed to parse registration response"); } - - resp.json::() - .await - .context("Failed to parse registration response") } pub async fn heartbeat( From 1d86d69e1afe067bb5fdb6507cc0e202ecc68681 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Mon, 4 May 2026 21:33:02 +0200 Subject: [PATCH 03/15] fix(agent): detect OS from /etc/os-release instead of sysinfo --- agent/src/system.rs | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/agent/src/system.rs b/agent/src/system.rs index be23d85..e4679bd 100644 --- a/agent/src/system.rs +++ b/agent/src/system.rs @@ -19,11 +19,46 @@ pub struct SystemMetrics { pub uptime_seconds: u64, } +fn parse_os_release_field(content: &str, field: &str) -> Option { + content + .lines() + .find(|l| l.starts_with(field)) + .and_then(|l| l.splitn(2, '=').nth(1)) + .map(|v| v.trim_matches('"').to_string()) +} + +fn detect_os() -> (String, String) { + if let Ok(os_type) = std::env::var("CSFX_OS_TYPE") { + let os_version = std::env::var("CSFX_OS_VERSION") + .unwrap_or_else(|_| System::os_version().unwrap_or_else(|| "unknown".to_string())); + return (os_type.to_lowercase(), os_version); + } + + if let Ok(content) = std::fs::read_to_string("/etc/os-release") { + let id = parse_os_release_field(&content, "ID"); + let version = parse_os_release_field(&content, "VERSION_ID") + .or_else(|| parse_os_release_field(&content, "BUILD_ID")); + + if let Some(os_type) = id { + let os_version = version.unwrap_or_else(|| { + System::os_version().unwrap_or_else(|| "unknown".to_string()) + }); + return (os_type.to_lowercase(), os_version); + } + } + + ( + System::name().unwrap_or_else(|| "linux".to_string()).to_lowercase(), + System::os_version().unwrap_or_else(|| "unknown".to_string()), + ) +} + pub fn collect_info() -> SystemInfo { + let (os_type, os_version) = detect_os(); SystemInfo { hostname: System::host_name().unwrap_or_else(|| "unknown".to_string()), - os_type: System::name().unwrap_or_else(|| "linux".to_string()).to_lowercase(), - os_version: System::os_version().unwrap_or_else(|| "unknown".to_string()), + os_type, + os_version, architecture: std::env::consts::ARCH.to_string(), } } From dea0988470673166002bf8a10f48fe5525599369 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Wed, 6 May 2026 21:52:41 +0200 Subject: [PATCH 04/15] ci: replace CP docker builds with musl binary pipeline --- .github/workflows/docker-build.yml | 212 +++++++++++++++++++++---- .github/workflows/prerelease.yml | 242 +++++++++++++++++++++++------ 2 files changed, 372 insertions(+), 82 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 67d27e0..c94c6b9 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -69,12 +69,6 @@ jobs: fail-fast: false matrix: service: - - api-gateway - - registry - - scheduler - - volume-manager - - failover-controller - - sdn-controller - patroni arch: - amd64 @@ -212,20 +206,84 @@ jobs: ${{ matrix.binary }}-${{ matrix.arch }}.sha256 retention-days: 7 - manifest: - name: Manifest ${{ matrix.service }} - runs-on: ubuntu-latest - needs: [prepare, build] + build-cp-binaries: + name: Build CP ${{ matrix.binary }} (${{ matrix.arch }}) + runs-on: ${{ matrix.runner }} + needs: prepare + if: needs.prepare.outputs.should_build == 'true' strategy: fail-fast: false matrix: - service: + binary: - api-gateway - registry - scheduler - volume-manager - failover-controller - sdn-controller + - csfx-migrate + arch: + - amd64 + - arm64 + include: + - arch: amd64 + runner: ubuntu-latest + target: x86_64-unknown-linux-musl + - arch: arm64 + runner: ubuntu-24.04-arm + target: aarch64-unknown-linux-musl + steps: + - uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get update + if [ "${{ matrix.arch }}" = "amd64" ]; then + sudo apt-get install -y musl-tools protobuf-compiler libpq-dev + else + sudo apt-get install -y gcc-aarch64-linux-gnu musl-tools protobuf-compiler libpq-dev + fi + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: cp-${{ matrix.binary }}-${{ matrix.arch }}-${{ hashFiles('**/Cargo.lock') }} + + - name: Build + env: + CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER: musl-gcc + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_LINKER: aarch64-linux-gnu-gcc + CSFX_BUILD_VERSION: ${{ needs.prepare.outputs.version }} + run: | + cargo build --release --bin ${{ matrix.binary }} --target ${{ matrix.target }} + cp target/${{ matrix.target }}/release/${{ matrix.binary }} csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} + sha256sum csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} > csfx-cp-${{ matrix.binary }}-${{ matrix.arch }}.sha256 + + - uses: actions/upload-artifact@v4 + with: + name: csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} + path: | + csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} + csfx-cp-${{ matrix.binary }}-${{ matrix.arch }}.sha256 + retention-days: 7 + + manifest: + name: Manifest ${{ matrix.service }} + runs-on: ubuntu-latest + needs: [prepare, build] + strategy: + fail-fast: false + matrix: + service: - patroni steps: - name: Set image name @@ -272,7 +330,7 @@ jobs: attach-binaries-release: name: Attach binaries to release runs-on: ubuntu-latest - needs: [prepare, build-binaries] + needs: [prepare, build-binaries, build-cp-binaries] steps: - uses: actions/checkout@v4 @@ -285,6 +343,14 @@ jobs: run: | VERSION="${{ needs.prepare.outputs.version }}" TAG="v${VERSION}" + + CP_BINS="" + for svc in api-gateway registry scheduler volume-manager failover-controller sdn-controller csfx-migrate; do + for arch in amd64 arm64; do + CP_BINS="${CP_BINS} csfx-cp-${svc}-${arch} csfx-cp-${svc}-${arch}.sha256" + done + done + if gh release view "${TAG}" &>/dev/null; then gh release upload "${TAG}" \ csfx-updater-amd64 \ @@ -295,6 +361,7 @@ jobs: csfx-agent-amd64.sha256 \ csfx-agent-arm64 \ csfx-agent-arm64.sha256 \ + ${CP_BINS} \ --clobber else gh release create "${TAG}" \ @@ -308,7 +375,8 @@ jobs: csfx-agent-amd64 \ csfx-agent-amd64.sha256 \ csfx-agent-arm64 \ - csfx-agent-arm64.sha256 + csfx-agent-arm64.sha256 \ + ${CP_BINS} fi env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -316,7 +384,7 @@ jobs: update-infra: name: Update CSFX-Infra versions.nix runs-on: ubuntu-latest - needs: [prepare, manifest, build-binaries, attach-binaries-release] + needs: [prepare, manifest, build-binaries, build-cp-binaries, attach-binaries-release] if: needs.prepare.outputs.is_release == 'true' steps: - uses: actions/checkout@v4 @@ -337,6 +405,12 @@ jobs: path: /tmp/binaries merge-multiple: true + - uses: actions/download-artifact@v4 + with: + pattern: csfx-cp-* + path: /tmp/cp-binaries + merge-multiple: true + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -362,9 +436,13 @@ jobs: } get_sha256() { - local binary=$1 - local arch=$2 - awk '{print $1}' /tmp/binaries/${binary}-${arch}.sha256 2>/dev/null + local file=$1 + awk '{print $1}' "/tmp/binaries/${file}.sha256" 2>/dev/null + } + + get_cp_sha256() { + local file=$1 + awk '{print $1}' "/tmp/cp-binaries/${file}.sha256" 2>/dev/null } cat > infra/versions.nix <> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "### Docker Images" >> $GITHUB_STEP_SUMMARY + echo "### Docker Images (patroni only)" >> $GITHUB_STEP_SUMMARY echo "| Service | Image |" >> $GITHUB_STEP_SUMMARY echo "|---------|-------|" >> $GITHUB_STEP_SUMMARY - for svc in api-gateway registry scheduler volume-manager failover-controller sdn-controller; do - echo "| ${svc} | \`ghcr.io/${ORG}/csfx-ce-${svc}:${VERSION}\` |" >> $GITHUB_STEP_SUMMARY + echo "| patroni | \`ghcr.io/${ORG}/csfx-ce-patroni:${VERSION}\` |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Control Plane Binaries" >> $GITHUB_STEP_SUMMARY + echo "| Binary | Arch | Artifact |" >> $GITHUB_STEP_SUMMARY + echo "|--------|------|----------|" >> $GITHUB_STEP_SUMMARY + for svc in api-gateway registry scheduler volume-manager failover-controller sdn-controller csfx-migrate; do + echo "| ${svc} | amd64 | \`csfx-cp-${svc}-amd64\` |" >> $GITHUB_STEP_SUMMARY + echo "| ${svc} | arm64 | \`csfx-cp-${svc}-arm64\` |" >> $GITHUB_STEP_SUMMARY done echo "" >> $GITHUB_STEP_SUMMARY - echo "### Binaries" >> $GITHUB_STEP_SUMMARY + echo "### Agent Binaries" >> $GITHUB_STEP_SUMMARY echo "| Binary | Arch | Artifact |" >> $GITHUB_STEP_SUMMARY echo "|--------|------|----------|" >> $GITHUB_STEP_SUMMARY for bin in csfx-updater csfx-agent; do diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index f3df0ab..6080cfa 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -43,20 +43,13 @@ jobs: echo "version=${BASE}-alpha.${COUNT}" >> $GITHUB_OUTPUT fi - build: - name: Build ${{ matrix.service }} (${{ matrix.arch }}) + build-patroni: + name: Build patroni (${{ matrix.arch }}) runs-on: ${{ matrix.runner }} needs: version strategy: fail-fast: false matrix: - service: - - api-gateway - - registry - - scheduler - - volume-manager - - failover-controller - - sdn-controller arch: - amd64 - arm64 @@ -74,7 +67,7 @@ jobs: id: image run: | ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT + echo "name=ghcr.io/${ORG}/csfx-ce-patroni" >> $GITHUB_OUTPUT - uses: docker/setup-buildx-action@v3 @@ -88,53 +81,40 @@ jobs: id: build uses: docker/build-push-action@v6 with: - context: . - file: control-plane/Dockerfile.prod.shared - build-args: | - SERVICE_BIN=${{ matrix.service }} - BUILD_JOBS=2 + context: deployments/docker/patroni + file: deployments/docker/patroni/Dockerfile push: true outputs: type=registry,name=${{ steps.image.outputs.name }},push-by-digest=true platforms: ${{ matrix.platform }} provenance: false - cache-from: type=gha,scope=pre-${{ matrix.service }}-${{ matrix.arch }} - cache-to: type=gha,mode=max,scope=pre-${{ matrix.service }}-${{ matrix.arch }} + cache-from: type=gha,scope=pre-patroni-${{ matrix.arch }} + cache-to: type=gha,mode=max,scope=pre-patroni-${{ matrix.arch }} - name: Save digest run: | mkdir -p /tmp/digests - echo "${{ steps.build.outputs.digest }}" > /tmp/digests/${{ matrix.service }}-${{ matrix.arch }}.txt + echo "${{ steps.build.outputs.digest }}" > /tmp/digests/patroni-${{ matrix.arch }}.txt - uses: actions/upload-artifact@v4 with: - name: digest-${{ matrix.service }}-${{ matrix.arch }} - path: /tmp/digests/${{ matrix.service }}-${{ matrix.arch }}.txt + name: digest-patroni-${{ matrix.arch }} + path: /tmp/digests/patroni-${{ matrix.arch }}.txt retention-days: 1 - manifest: - name: Manifest ${{ matrix.service }} + manifest-patroni: + name: Manifest patroni runs-on: ubuntu-latest - needs: [version, build] - strategy: - fail-fast: false - matrix: - service: - - api-gateway - - registry - - scheduler - - volume-manager - - failover-controller - - sdn-controller + needs: [version, build-patroni] steps: - name: Set image name id: image run: | ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT + echo "name=ghcr.io/${ORG}/csfx-ce-patroni" >> $GITHUB_OUTPUT - uses: actions/download-artifact@v4 with: - pattern: digest-${{ matrix.service }}-* + pattern: digest-patroni-* path: /tmp/digests merge-multiple: true @@ -150,8 +130,8 @@ jobs: run: | VERSION="${{ needs.version.outputs.version }}" IMAGE="${{ steps.image.outputs.name }}" - AMD64=$(cat /tmp/digests/${{ matrix.service }}-amd64.txt) - ARM64=$(cat /tmp/digests/${{ matrix.service }}-arm64.txt) + AMD64=$(tr -d '[:space:]' < /tmp/digests/patroni-amd64.txt) + ARM64=$(tr -d '[:space:]' < /tmp/digests/patroni-arm64.txt) docker buildx imagetools create \ -t ${IMAGE}:${VERSION} \ ${IMAGE}@${AMD64} \ @@ -221,10 +201,79 @@ jobs: ${{ matrix.binary }}-${{ matrix.arch }}.sha256 retention-days: 7 + build-cp-binaries: + name: Build CP ${{ matrix.binary }} (${{ matrix.arch }}) + runs-on: ${{ matrix.runner }} + needs: version + strategy: + fail-fast: false + matrix: + binary: + - api-gateway + - registry + - scheduler + - volume-manager + - failover-controller + - sdn-controller + - csfx-migrate + arch: + - amd64 + - arm64 + include: + - arch: amd64 + runner: ubuntu-latest + target: x86_64-unknown-linux-musl + - arch: arm64 + runner: ubuntu-24.04-arm + target: aarch64-unknown-linux-musl + steps: + - uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get update + if [ "${{ matrix.arch }}" = "amd64" ]; then + sudo apt-get install -y musl-tools protobuf-compiler libpq-dev + else + sudo apt-get install -y gcc-aarch64-linux-gnu musl-tools protobuf-compiler libpq-dev + fi + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + + - name: Cache cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: cp-${{ matrix.binary }}-${{ matrix.arch }}-${{ hashFiles('**/Cargo.lock') }} + + - name: Build + env: + CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER: musl-gcc + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_LINKER: aarch64-linux-gnu-gcc + CSFX_BUILD_VERSION: ${{ needs.version.outputs.version }} + run: | + cargo build --release --bin ${{ matrix.binary }} --target ${{ matrix.target }} + cp target/${{ matrix.target }}/release/${{ matrix.binary }} csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} + sha256sum csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} > csfx-cp-${{ matrix.binary }}-${{ matrix.arch }}.sha256 + + - uses: actions/upload-artifact@v4 + with: + name: csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} + path: | + csfx-cp-${{ matrix.binary }}-${{ matrix.arch }} + csfx-cp-${{ matrix.binary }}-${{ matrix.arch }}.sha256 + retention-days: 7 + github-release: name: GitHub Pre-release runs-on: ubuntu-latest - needs: [version, manifest, build-binaries] + needs: [version, manifest-patroni, build-binaries, build-cp-binaries] steps: - uses: actions/checkout@v4 @@ -236,6 +285,13 @@ jobs: - name: Create pre-release run: | VERSION="${{ needs.version.outputs.version }}" + CP_BINS="" + for svc in api-gateway registry scheduler volume-manager failover-controller sdn-controller csfx-migrate; do + for arch in amd64 arm64; do + CP_BINS="${CP_BINS} csfx-cp-${svc}-${arch} csfx-cp-${svc}-${arch}.sha256" + done + done + gh release create "v${VERSION}" \ --title "v${VERSION}" \ --prerelease \ @@ -247,14 +303,15 @@ jobs: csfx-agent-amd64 \ csfx-agent-amd64.sha256 \ csfx-agent-arm64 \ - csfx-agent-arm64.sha256 + csfx-agent-arm64.sha256 \ + ${CP_BINS} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} update-infra: name: Update CSFX-Infra versions.nix runs-on: ubuntu-latest - needs: [version, manifest, build-binaries, github-release] + needs: [version, manifest-patroni, build-binaries, build-cp-binaries, github-release] steps: - uses: actions/checkout@v4 with: @@ -269,6 +326,18 @@ jobs: path: /tmp/binaries merge-multiple: true + - uses: actions/download-artifact@v4 + with: + pattern: csfx-cp-* + path: /tmp/cp-binaries + merge-multiple: true + + - uses: actions/download-artifact@v4 + with: + pattern: digest-patroni-* + path: /tmp/digests + merge-multiple: true + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -294,9 +363,13 @@ jobs: } get_sha256() { - local binary=$1 - local arch=$2 - awk '{print $1}' /tmp/binaries/${binary}-${arch}.sha256 2>/dev/null + local file=$1 + awk '{print $1}' "/tmp/binaries/${file}.sha256" 2>/dev/null + } + + get_cp_sha256() { + local file=$1 + awk '{print $1}' "/tmp/cp-binaries/${file}.sha256" 2>/dev/null } cat > infra/versions.nix < Date: Thu, 7 May 2026 17:56:39 +0200 Subject: [PATCH 05/15] chore(ci): remove patroni docker build jobs from workflows --- .github/workflows/docker-build.yml | 251 +----------------- .github/workflows/prerelease.yml | 125 +-------- control-plane/api-gateway/src/main.rs | 7 +- control-plane/failover-controller/src/main.rs | 3 +- control-plane/registry/src/main.rs | 3 +- control-plane/scheduler/src/main.rs | 3 +- control-plane/sdn-controller/src/main.rs | 3 +- control-plane/volume-manager/src/main.rs | 3 +- 8 files changed, 32 insertions(+), 366 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index c94c6b9..531e181 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -1,4 +1,4 @@ -name: Docker Build & Push +name: Release Build & Push on: workflow_run: @@ -10,9 +10,6 @@ on: description: "Version tag (e.g. 1.2.3)" required: true type: string - push: - branches: - - develop permissions: contents: write @@ -24,12 +21,10 @@ jobs: runs-on: ubuntu-latest if: > github.event_name == 'workflow_dispatch' || - github.event_name == 'push' || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') outputs: version: ${{ steps.version.outputs.version }} should_build: ${{ steps.version.outputs.should_build }} - is_release: ${{ steps.version.outputs.is_release }} steps: - uses: actions/checkout@v4 @@ -39,108 +34,18 @@ jobs: if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then echo "version=${{ inputs.version }}" >> $GITHUB_OUTPUT echo "should_build=true" >> $GITHUB_OUTPUT - echo "is_release=true" >> $GITHUB_OUTPUT - elif [ "${{ github.event_name }}" = "push" ]; then - VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)".*/\1/') - RUN_NUM="${{ github.run_number }}" - echo "version=${VERSION}-alpha.${RUN_NUM}" >> $GITHUB_OUTPUT - echo "should_build=true" >> $GITHUB_OUTPUT - echo "is_release=false" >> $GITHUB_OUTPUT else TAG=$(gh release list --limit 1 --json tagName -q '.[0].tagName' 2>/dev/null || echo "") if [ -z "$TAG" ]; then echo "should_build=false" >> $GITHUB_OUTPUT - echo "is_release=false" >> $GITHUB_OUTPUT else echo "version=${TAG#v}" >> $GITHUB_OUTPUT echo "should_build=true" >> $GITHUB_OUTPUT - echo "is_release=true" >> $GITHUB_OUTPUT fi fi env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - build: - name: Build ${{ matrix.service }} (${{ matrix.arch }}) - runs-on: ${{ matrix.runner }} - needs: prepare - if: needs.prepare.outputs.should_build == 'true' - strategy: - fail-fast: false - matrix: - service: - - patroni - arch: - - amd64 - - arm64 - include: - - arch: amd64 - runner: ubuntu-latest - platform: linux/amd64 - - arch: arm64 - runner: ubuntu-24.04-arm - platform: linux/arm64 - - service: patroni - dockerfile: deployments/docker/patroni/Dockerfile - context: deployments/docker/patroni - steps: - - uses: actions/checkout@v4 - - - name: Set image name - id: image - run: | - ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT - - - name: Resolve Dockerfile and context - id: dockerctx - run: | - if [ -n "${{ matrix.dockerfile }}" ]; then - echo "file=${{ matrix.dockerfile }}" >> $GITHUB_OUTPUT - echo "context=${{ matrix.context }}" >> $GITHUB_OUTPUT - echo "build_args=" >> $GITHUB_OUTPUT - else - echo "file=control-plane/Dockerfile.prod.shared" >> $GITHUB_OUTPUT - echo "context=." >> $GITHUB_OUTPUT - printf 'build_args=SERVICE_BIN=%s\nBUILD_JOBS=2\nCSFX_BUILD_VERSION=%s\n' \ - "${{ matrix.service }}" "${{ needs.prepare.outputs.version }}" >> $GITHUB_OUTPUT - fi - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - context: ${{ steps.dockerctx.outputs.context }} - file: ${{ steps.dockerctx.outputs.file }} - build-args: ${{ steps.dockerctx.outputs.build_args }} - push: true - outputs: type=registry,name=${{ steps.image.outputs.name }},push-by-digest=true - platforms: ${{ matrix.platform }} - provenance: false - cache-from: type=gha,scope=${{ matrix.service }}-${{ matrix.arch }} - cache-to: type=gha,mode=max,scope=${{ matrix.service }}-${{ matrix.arch }} - - - name: Save digest - run: | - mkdir -p /tmp/digests - echo "${{ steps.build.outputs.digest }}" > /tmp/digests/${{ matrix.service }}-${{ matrix.arch }}.txt - - - uses: actions/upload-artifact@v4 - with: - name: digest-${{ matrix.service }}-${{ matrix.arch }} - path: /tmp/digests/${{ matrix.service }}-${{ matrix.arch }}.txt - retention-days: 1 - build-binaries: name: Build ${{ matrix.binary }} (${{ matrix.arch }}) runs-on: ${{ matrix.runner }} @@ -276,57 +181,6 @@ jobs: csfx-cp-${{ matrix.binary }}-${{ matrix.arch }}.sha256 retention-days: 7 - manifest: - name: Manifest ${{ matrix.service }} - runs-on: ubuntu-latest - needs: [prepare, build] - strategy: - fail-fast: false - matrix: - service: - - patroni - steps: - - name: Set image name - id: image - run: | - ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT - - - uses: actions/download-artifact@v4 - with: - pattern: digest-${{ matrix.service }}-* - path: /tmp/digests - merge-multiple: true - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Create and push manifest - run: | - VERSION="${{ needs.prepare.outputs.version }}" - IMAGE="${{ steps.image.outputs.name }}" - - if [ -z "${VERSION}" ]; then - echo "VERSION is empty, aborting" - exit 1 - fi - - AMD64=$(tr -d '[:space:]' < /tmp/digests/${{ matrix.service }}-amd64.txt) - ARM64=$(tr -d '[:space:]' < /tmp/digests/${{ matrix.service }}-arm64.txt) - - docker buildx imagetools create \ - -t ${IMAGE}:${VERSION} \ - -t ${IMAGE}:latest \ - ${IMAGE}@${AMD64} \ - ${IMAGE}@${ARM64} - attach-binaries-release: name: Attach binaries to release runs-on: ubuntu-latest @@ -339,7 +193,7 @@ jobs: pattern: csfx-* merge-multiple: true - - name: Create or update release + - name: Upload to release run: | VERSION="${{ needs.prepare.outputs.version }}" TAG="v${VERSION}" @@ -351,41 +205,24 @@ jobs: done done - if gh release view "${TAG}" &>/dev/null; then - gh release upload "${TAG}" \ - csfx-updater-amd64 \ - csfx-updater-amd64.sha256 \ - csfx-updater-arm64 \ - csfx-updater-arm64.sha256 \ - csfx-agent-amd64 \ - csfx-agent-amd64.sha256 \ - csfx-agent-arm64 \ - csfx-agent-arm64.sha256 \ - ${CP_BINS} \ - --clobber - else - gh release create "${TAG}" \ - --title "v${VERSION}" \ - --prerelease \ - --notes "Alpha build ${VERSION}" \ - csfx-updater-amd64 \ - csfx-updater-amd64.sha256 \ - csfx-updater-arm64 \ - csfx-updater-arm64.sha256 \ - csfx-agent-amd64 \ - csfx-agent-amd64.sha256 \ - csfx-agent-arm64 \ - csfx-agent-arm64.sha256 \ - ${CP_BINS} - fi + gh release upload "${TAG}" \ + csfx-updater-amd64 \ + csfx-updater-amd64.sha256 \ + csfx-updater-arm64 \ + csfx-updater-arm64.sha256 \ + csfx-agent-amd64 \ + csfx-agent-amd64.sha256 \ + csfx-agent-arm64 \ + csfx-agent-arm64.sha256 \ + ${CP_BINS} \ + --clobber env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} update-infra: name: Update CSFX-Infra versions.nix runs-on: ubuntu-latest - needs: [prepare, manifest, build-binaries, build-cp-binaries, attach-binaries-release] - if: needs.prepare.outputs.is_release == 'true' + needs: [prepare, build-binaries, build-cp-binaries, attach-binaries-release] steps: - uses: actions/checkout@v4 with: @@ -393,12 +230,6 @@ jobs: token: ${{ secrets.INFRA_REPO_TOKEN }} path: infra - - uses: actions/download-artifact@v4 - with: - pattern: digest-* - path: /tmp/digests - merge-multiple: true - - uses: actions/download-artifact@v4 with: pattern: csfx-agent-* @@ -411,30 +242,12 @@ jobs: path: /tmp/cp-binaries merge-multiple: true - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Write versions.nix run: | VERSION="${{ needs.prepare.outputs.version }}" - ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') REPO="${{ github.repository }}" RELEASE_BASE="https://github.com/${REPO}/releases/download/v${VERSION}" - get_manifest_digest() { - local svc=$1 - local image="ghcr.io/${ORG}/csfx-ce-${svc}:${VERSION}" - docker buildx imagetools inspect "${image}" \ - --format '{{json .Manifest}}' | jq -r '.digest' - } - get_sha256() { local file=$1 awk '{print $1}' "/tmp/binaries/${file}.sha256" 2>/dev/null @@ -449,9 +262,6 @@ jobs: { csfx = { version = "${VERSION}"; - images = { - patroni = { digest = "$(get_manifest_digest patroni)"; }; - }; agent = { amd64 = { url = "${RELEASE_BASE}/csfx-agent-amd64"; @@ -550,36 +360,3 @@ jobs: git push origin main git tag "v${VERSION}" git push origin "v${VERSION}" - - summary: - name: Summary - runs-on: ubuntu-latest - needs: [prepare, manifest, build-binaries, build-cp-binaries] - if: always() - steps: - - name: Write summary - run: | - VERSION="${{ needs.prepare.outputs.version }}" - ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "## Build — v${VERSION}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Docker Images (patroni only)" >> $GITHUB_STEP_SUMMARY - echo "| Service | Image |" >> $GITHUB_STEP_SUMMARY - echo "|---------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| patroni | \`ghcr.io/${ORG}/csfx-ce-patroni:${VERSION}\` |" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Control Plane Binaries" >> $GITHUB_STEP_SUMMARY - echo "| Binary | Arch | Artifact |" >> $GITHUB_STEP_SUMMARY - echo "|--------|------|----------|" >> $GITHUB_STEP_SUMMARY - for svc in api-gateway registry scheduler volume-manager failover-controller sdn-controller csfx-migrate; do - echo "| ${svc} | amd64 | \`csfx-cp-${svc}-amd64\` |" >> $GITHUB_STEP_SUMMARY - echo "| ${svc} | arm64 | \`csfx-cp-${svc}-arm64\` |" >> $GITHUB_STEP_SUMMARY - done - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Agent Binaries" >> $GITHUB_STEP_SUMMARY - echo "| Binary | Arch | Artifact |" >> $GITHUB_STEP_SUMMARY - echo "|--------|------|----------|" >> $GITHUB_STEP_SUMMARY - for bin in csfx-updater csfx-agent; do - echo "| ${bin} | amd64 | \`${bin}-amd64\` |" >> $GITHUB_STEP_SUMMARY - echo "| ${bin} | arm64 | \`${bin}-arm64\` |" >> $GITHUB_STEP_SUMMARY - done diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 6080cfa..dcf11cc 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -43,100 +43,6 @@ jobs: echo "version=${BASE}-alpha.${COUNT}" >> $GITHUB_OUTPUT fi - build-patroni: - name: Build patroni (${{ matrix.arch }}) - runs-on: ${{ matrix.runner }} - needs: version - strategy: - fail-fast: false - matrix: - arch: - - amd64 - - arm64 - include: - - arch: amd64 - runner: ubuntu-latest - platform: linux/amd64 - - arch: arm64 - runner: ubuntu-24.04-arm - platform: linux/arm64 - steps: - - uses: actions/checkout@v4 - - - name: Set image name - id: image - run: | - ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csfx-ce-patroni" >> $GITHUB_OUTPUT - - - uses: docker/setup-buildx-action@v3 - - - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - context: deployments/docker/patroni - file: deployments/docker/patroni/Dockerfile - push: true - outputs: type=registry,name=${{ steps.image.outputs.name }},push-by-digest=true - platforms: ${{ matrix.platform }} - provenance: false - cache-from: type=gha,scope=pre-patroni-${{ matrix.arch }} - cache-to: type=gha,mode=max,scope=pre-patroni-${{ matrix.arch }} - - - name: Save digest - run: | - mkdir -p /tmp/digests - echo "${{ steps.build.outputs.digest }}" > /tmp/digests/patroni-${{ matrix.arch }}.txt - - - uses: actions/upload-artifact@v4 - with: - name: digest-patroni-${{ matrix.arch }} - path: /tmp/digests/patroni-${{ matrix.arch }}.txt - retention-days: 1 - - manifest-patroni: - name: Manifest patroni - runs-on: ubuntu-latest - needs: [version, build-patroni] - steps: - - name: Set image name - id: image - run: | - ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csfx-ce-patroni" >> $GITHUB_OUTPUT - - - uses: actions/download-artifact@v4 - with: - pattern: digest-patroni-* - path: /tmp/digests - merge-multiple: true - - - uses: docker/setup-buildx-action@v3 - - - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Create and push manifest - run: | - VERSION="${{ needs.version.outputs.version }}" - IMAGE="${{ steps.image.outputs.name }}" - AMD64=$(tr -d '[:space:]' < /tmp/digests/patroni-amd64.txt) - ARM64=$(tr -d '[:space:]' < /tmp/digests/patroni-arm64.txt) - docker buildx imagetools create \ - -t ${IMAGE}:${VERSION} \ - ${IMAGE}@${AMD64} \ - ${IMAGE}@${ARM64} - build-binaries: name: Build ${{ matrix.binary }} (${{ matrix.arch }}) runs-on: ${{ matrix.runner }} @@ -273,7 +179,7 @@ jobs: github-release: name: GitHub Pre-release runs-on: ubuntu-latest - needs: [version, manifest-patroni, build-binaries, build-cp-binaries] + needs: [version, build-binaries, build-cp-binaries] steps: - uses: actions/checkout@v4 @@ -311,7 +217,7 @@ jobs: update-infra: name: Update CSFX-Infra versions.nix runs-on: ubuntu-latest - needs: [version, manifest-patroni, build-binaries, build-cp-binaries, github-release] + needs: [version, build-binaries, build-cp-binaries, github-release] steps: - uses: actions/checkout@v4 with: @@ -332,36 +238,12 @@ jobs: path: /tmp/cp-binaries merge-multiple: true - - uses: actions/download-artifact@v4 - with: - pattern: digest-patroni-* - path: /tmp/digests - merge-multiple: true - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Write versions.nix run: | VERSION="${{ needs.version.outputs.version }}" - ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') REPO="${{ github.repository }}" RELEASE_BASE="https://github.com/${REPO}/releases/download/v${VERSION}" - get_manifest_digest() { - local svc=$1 - local image="ghcr.io/${ORG}/csfx-ce-${svc}:${VERSION}" - docker buildx imagetools inspect "${image}" \ - --format '{{json .Manifest}}' | jq -r '.digest' - } - get_sha256() { local file=$1 awk '{print $1}' "/tmp/binaries/${file}.sha256" 2>/dev/null @@ -376,9 +258,6 @@ jobs: { csfx = { version = "${VERSION}"; - images = { - patroni = { digest = "$(get_manifest_digest patroni)"; }; - }; agent = { amd64 = { url = "${RELEASE_BASE}/csfx-agent-amd64"; diff --git a/control-plane/api-gateway/src/main.rs b/control-plane/api-gateway/src/main.rs index 25530fd..2282a4c 100644 --- a/control-plane/api-gateway/src/main.rs +++ b/control-plane/api-gateway/src/main.rs @@ -161,7 +161,12 @@ async fn main() { .merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi())) .with_state(state); - let addr = SocketAddr::from(([0, 0, 0, 0], 8000)); + let port = std::env::var("GATEWAY_PORT") + .ok() + .and_then(|p| p.parse::().ok()) + .unwrap_or(8000); + let listen_addr = std::env::var("LISTEN_ADDR").unwrap_or_else(|_| "0.0.0.0".to_string()); + let addr: SocketAddr = format!("{}:{}", listen_addr, port).parse().unwrap(); tracing::info!(version = env!("CARGO_PKG_VERSION"), addr = %addr, "listening"); let listener = tokio::net::TcpListener::bind(addr).await.unwrap(); axum::serve( diff --git a/control-plane/failover-controller/src/main.rs b/control-plane/failover-controller/src/main.rs index 926e5ed..31433c7 100644 --- a/control-plane/failover-controller/src/main.rs +++ b/control-plane/failover-controller/src/main.rs @@ -33,7 +33,8 @@ async fn main() -> anyhow::Result<()> { .and_then(|p| p.parse::().ok()) .unwrap_or(8004); - let addr = SocketAddr::from(([0, 0, 0, 0], port)); + let listen_addr = std::env::var("LISTEN_ADDR").unwrap_or_else(|_| "127.0.0.1".to_string()); + let addr: SocketAddr = format!("{}:{}", listen_addr, port).parse().unwrap(); log_info!( "main", &format!("Failover Controller listening port={}", port) diff --git a/control-plane/registry/src/main.rs b/control-plane/registry/src/main.rs index 86b353c..0ed4b33 100644 --- a/control-plane/registry/src/main.rs +++ b/control-plane/registry/src/main.rs @@ -100,7 +100,8 @@ async fn main() -> anyhow::Result<()> { .and_then(|p| p.parse::().ok()) .unwrap_or(8001); - let addr = SocketAddr::from(([0, 0, 0, 0], port)); + let listen_addr = std::env::var("LISTEN_ADDR").unwrap_or_else(|_| "127.0.0.1".to_string()); + let addr: SocketAddr = format!("{}:{}", listen_addr, port).parse().unwrap(); log_info!("main", &format!("Registry listening port={}", port)); diff --git a/control-plane/scheduler/src/main.rs b/control-plane/scheduler/src/main.rs index dab7fdf..743d285 100644 --- a/control-plane/scheduler/src/main.rs +++ b/control-plane/scheduler/src/main.rs @@ -58,7 +58,8 @@ async fn main() -> anyhow::Result<()> { .and_then(|p| p.parse::().ok()) .unwrap_or(8002); - let addr = SocketAddr::from(([0, 0, 0, 0], port)); + let listen_addr = std::env::var("LISTEN_ADDR").unwrap_or_else(|_| "127.0.0.1".to_string()); + let addr: SocketAddr = format!("{}:{}", listen_addr, port).parse().unwrap(); log_info!("main", &format!("Scheduler listening port={}", port)); let listener = tokio::net::TcpListener::bind(addr).await?; diff --git a/control-plane/sdn-controller/src/main.rs b/control-plane/sdn-controller/src/main.rs index 47fa396..6aad76f 100644 --- a/control-plane/sdn-controller/src/main.rs +++ b/control-plane/sdn-controller/src/main.rs @@ -41,7 +41,8 @@ async fn main() -> anyhow::Result<()> { .and_then(|p| p.parse::().ok()) .unwrap_or(8005); - let addr = SocketAddr::from(([0, 0, 0, 0], port)); + let listen_addr = std::env::var("LISTEN_ADDR").unwrap_or_else(|_| "127.0.0.1".to_string()); + let addr: SocketAddr = format!("{}:{}", listen_addr, port).parse().unwrap(); log_info!("main", &format!("SDN Controller listening port={}", port)); let listener = tokio::net::TcpListener::bind(addr).await?; diff --git a/control-plane/volume-manager/src/main.rs b/control-plane/volume-manager/src/main.rs index a411c5d..6da6bd5 100644 --- a/control-plane/volume-manager/src/main.rs +++ b/control-plane/volume-manager/src/main.rs @@ -75,7 +75,8 @@ async fn main() -> anyhow::Result<()> { .and_then(|p| p.parse::().ok()) .unwrap_or(8003); - let addr = SocketAddr::from(([0, 0, 0, 0], port)); + let listen_addr = std::env::var("LISTEN_ADDR").unwrap_or_else(|_| "127.0.0.1".to_string()); + let addr: SocketAddr = format!("{}:{}", listen_addr, port).parse().unwrap(); log_info!("main", &format!("Volume Manager listening port={}", port)); let listener = tokio::net::TcpListener::bind(addr).await?; From 013b2c899925240cbc667b88052d27bb2c536e7c Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sat, 9 May 2026 18:30:57 +0200 Subject: [PATCH 06/15] fix: agent bootstrap error --- agent/src/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agent/src/main.rs b/agent/src/main.rs index 66ecc86..0324388 100644 --- a/agent/src/main.rs +++ b/agent/src/main.rs @@ -104,9 +104,9 @@ async fn perform_registration( heartbeat_interval_secs: u64, agent_pki: &pki::AgentPki, ) -> Result<(uuid::Uuid, String)> { - let token = match std::env::var("CSFX_REGISTRATION_TOKEN") { - Ok(t) => t, - Err(_) => { + let token = match std::env::var("CSFX_REGISTRATION_TOKEN").ok().filter(|t| !t.is_empty()) { + Some(t) => t, + None => { info!("CSFX_REGISTRATION_TOKEN not set, fetching bootstrap token from gateway"); client .fetch_bootstrap_token() From 98a935a7f89bb901b5a3314854016e60a70c9950 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sat, 9 May 2026 20:01:04 +0200 Subject: [PATCH 07/15] fix: make bootstrap registration idempotent by upserting on hostname --- control-plane/registry/src/db/agents.rs | 43 +++++++ control-plane/registry/src/handlers/agent.rs | 117 ++++++++++-------- .../registry/src/services/api_keys.rs | 4 + .../registry/src/services/registry.rs | 60 +++++++-- 4 files changed, 164 insertions(+), 60 deletions(-) diff --git a/control-plane/registry/src/db/agents.rs b/control-plane/registry/src/db/agents.rs index 14bbccc..f9b8dd4 100644 --- a/control-plane/registry/src/db/agents.rs +++ b/control-plane/registry/src/db/agents.rs @@ -5,6 +5,49 @@ use sea_orm::{ }; use uuid::Uuid; +pub async fn get_by_hostname( + db: &DatabaseConnection, + hostname: &str, +) -> Result> { + Ok(agents::Entity::find() + .filter(agents::Column::Hostname.eq(hostname)) + .one(db) + .await?) +} + +pub async fn update_registration( + db: &DatabaseConnection, + agent_id: Uuid, + agent_version: String, + os_type: String, + os_version: String, + architecture: String, + tags: Option, + public_key_pem: Option, +) -> Result { + let mut agent: agents::ActiveModel = agents::Entity::find_by_id(agent_id) + .one(db) + .await? + .ok_or_else(|| anyhow::anyhow!("Agent not found"))? + .into(); + + agent.agent_version = Set(agent_version); + agent.os_type = Set(os_type); + agent.os_version = Set(os_version); + agent.architecture = Set(architecture); + agent.status = Set("Online".to_string()); + agent.last_heartbeat = Set(Some(chrono::Utc::now().naive_utc())); + agent.updated_at = Set(Some(chrono::Utc::now().naive_utc())); + if tags.is_some() { + agent.tags = Set(tags); + } + if public_key_pem.is_some() { + agent.public_key_pem = Set(public_key_pem); + } + + Ok(agent.update(db).await?) +} + pub async fn create( db: &DatabaseConnection, id: Uuid, diff --git a/control-plane/registry/src/handlers/agent.rs b/control-plane/registry/src/handlers/agent.rs index 23a8378..a42084f 100644 --- a/control-plane/registry/src/handlers/agent.rs +++ b/control-plane/registry/src/handlers/agent.rs @@ -15,69 +15,70 @@ pub async fn register_agent( State(state): State, Json(request): Json, ) -> Result, (StatusCode, Json)> { - let agent_id = if crate::services::bootstrap_tokens::BootstrapTokenManager::is_bootstrap_token( - &request.registration_token, - ) { - if let Err(e) = state - .bootstrap_token_manager - .validate_and_use(&request.registration_token) - .await - { - return Err(( - StatusCode::UNAUTHORIZED, - Json(ErrorResponse { - error: format!("Invalid bootstrap token: {}", e), - }), - )); - } - uuid::Uuid::new_v4() - } else { - let token_data = match state - .token_manager - .validate_and_consume_token(&request.registration_token) - .await - { - Ok(token) => token, - Err(e) => { + let (agent_id, allow_reregister) = + if crate::services::bootstrap_tokens::BootstrapTokenManager::is_bootstrap_token( + &request.registration_token, + ) { + if let Err(e) = state + .bootstrap_token_manager + .validate_and_use(&request.registration_token) + .await + { return Err(( StatusCode::UNAUTHORIZED, Json(ErrorResponse { - error: format!("Invalid registration token: {}", e), + error: format!("Invalid bootstrap token: {}", e), }), - )) + )); } - }; + (uuid::Uuid::new_v4(), true) + } else { + let token_data = match state + .token_manager + .validate_and_consume_token(&request.registration_token) + .await + { + Ok(token) => token, + Err(e) => { + return Err(( + StatusCode::UNAUTHORIZED, + Json(ErrorResponse { + error: format!("Invalid registration token: {}", e), + }), + )) + } + }; - if token_data.expected_name != request.name { - return Err(( - StatusCode::FORBIDDEN, - Json(ErrorResponse { - error: format!( - "Agent name mismatch. Expected '{}', got '{}'", - token_data.expected_name, request.name - ), - }), - )); - } + if token_data.expected_name != request.name { + return Err(( + StatusCode::FORBIDDEN, + Json(ErrorResponse { + error: format!( + "Agent name mismatch. Expected '{}', got '{}'", + token_data.expected_name, request.name + ), + }), + )); + } - if token_data.expected_hostname != request.hostname { - return Err(( - StatusCode::FORBIDDEN, - Json(ErrorResponse { - error: format!( - "Agent hostname mismatch. Expected '{}', got '{}'", - token_data.expected_hostname, request.hostname - ), - }), - )); - } + if token_data.expected_hostname != request.hostname { + return Err(( + StatusCode::FORBIDDEN, + Json(ErrorResponse { + error: format!( + "Agent hostname mismatch. Expected '{}', got '{}'", + token_data.expected_hostname, request.hostname + ), + }), + )); + } - token_data.agent_id - }; + (token_data.agent_id, false) + }; let csr_pem = request.csr_pem.clone(); - let agent = match state + let (agent, reregistered) = match state .agent_registry .register_agent(RegisterAgentParams { agent_id, @@ -88,10 +89,11 @@ pub async fn register_agent( architecture: request.architecture, agent_version: request.agent_version, tags: request.tags, + allow_reregister, }) .await { - Ok(agent) => agent, + Ok(result) => result, Err(e) => { return Err(( StatusCode::INTERNAL_SERVER_ERROR, @@ -102,6 +104,15 @@ pub async fn register_agent( } }; + if reregistered { + if let Err(e) = state.api_key_manager.revoke_all_keys(agent.id).await { + crate::log_warn!( + "agent_handler", + &format!("Failed to revoke old API keys for agent={}: {}", agent.id, e) + ); + } + } + let api_key = state.api_key_manager.create_key(agent.id).await; let (certificate_pem, ca_cert_pem) = if let Some(ref csr) = csr_pem { diff --git a/control-plane/registry/src/services/api_keys.rs b/control-plane/registry/src/services/api_keys.rs index b1001b2..d1bf6ec 100644 --- a/control-plane/registry/src/services/api_keys.rs +++ b/control-plane/registry/src/services/api_keys.rs @@ -77,6 +77,10 @@ impl ApiKeyManager { } } + pub async fn revoke_all_keys(&self, agent_id: Uuid) -> Result<(), String> { + self.revoke_key(agent_id).await + } + pub async fn revoke_key(&self, agent_id: Uuid) -> Result<(), String> { match crate::db::api_keys::revoke_by_agent(&self.db, agent_id).await { Ok(revoked) => { diff --git a/control-plane/registry/src/services/registry.rs b/control-plane/registry/src/services/registry.rs index 7d273f0..9b12bdc 100644 --- a/control-plane/registry/src/services/registry.rs +++ b/control-plane/registry/src/services/registry.rs @@ -26,6 +26,7 @@ pub struct RegisterAgentParams { pub architecture: String, pub agent_version: String, pub tags: Option>, + pub allow_reregister: bool, } pub struct AgentRegistry { @@ -112,12 +113,57 @@ impl AgentRegistry { pub async fn register_agent( &self, params: RegisterAgentParams, - ) -> Result { + ) -> Result<(RegisteredAgent, bool), String> { let tags_json = params .tags .as_ref() .and_then(|t| serde_json::to_value(t).ok()); + if params.allow_reregister { + if let Some(existing) = crate::db::agents::get_by_hostname(&self.db, ¶ms.hostname) + .await + .map_err(|e| format!("Failed to query existing agent: {}", e))? + { + let db_agent = crate::db::agents::update_registration( + &self.db, + existing.id, + params.agent_version.clone(), + params.os_type.clone(), + params.os_version.clone(), + params.architecture.clone(), + tags_json, + None, + ) + .await + .map_err(|e| format!("Failed to update existing agent: {}", e))?; + + crate::log_info!( + "agent_registry", + &format!( + "Re-registered existing agent: {} id={}", + params.name, db_agent.id + ) + ); + + let agent = RegisteredAgent { + id: db_agent.id, + name: db_agent.name, + hostname: db_agent.hostname, + ip_address: db_agent.ip_address, + os_type: db_agent.os_type, + os_version: db_agent.os_version, + architecture: db_agent.architecture, + agent_version: db_agent.agent_version, + status: AgentStatus::Online, + registered_at: db_agent.registered_at.and_utc(), + last_heartbeat: db_agent.last_heartbeat.map(|dt: NaiveDateTime| dt.and_utc()), + tags: params.tags, + }; + + return Ok((agent, true)); + } + } + let db_agent = crate::db::agents::create( &self.db, params.agent_id, @@ -136,6 +182,11 @@ impl AgentRegistry { .await .map_err(|e| format!("Failed to create agent in database: {}", e))?; + crate::log_info!( + "agent_registry", + &format!("Registered new agent: {} id={}", params.name, db_agent.id) + ); + let agent = RegisteredAgent { id: db_agent.id, name: db_agent.name.clone(), @@ -151,12 +202,7 @@ impl AgentRegistry { tags: params.tags, }; - crate::log_info!( - "agent_registry", - &format!("Registered new agent: {} id={}", params.name, agent.id) - ); - - Ok(agent) + Ok((agent, false)) } pub async fn update_heartbeat(&self, agent_id: Uuid) -> Result<(), String> { From 5bd062e4c5941b2c49ce23c76b4f6a92895eb466 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sun, 10 May 2026 15:28:02 +0200 Subject: [PATCH 08/15] feat(cp): wire csfx-updater as systemd service and rename units --- deployments/systemd/csfx-updater.service | 24 ---- deployments/systemd/csfx-updater.sh | 166 ----------------------- deployments/systemd/install.sh | 53 -------- 3 files changed, 243 deletions(-) delete mode 100644 deployments/systemd/csfx-updater.service delete mode 100755 deployments/systemd/csfx-updater.sh delete mode 100755 deployments/systemd/install.sh diff --git a/deployments/systemd/csfx-updater.service b/deployments/systemd/csfx-updater.service deleted file mode 100644 index 544f4f0..0000000 --- a/deployments/systemd/csfx-updater.service +++ /dev/null @@ -1,24 +0,0 @@ -[Unit] -Description=CSFX Control Plane Updater -After=network.target docker.service -Requires=docker.service - -[Service] -Type=simple -User=csfx-updater -Group=docker -EnvironmentFile=/opt/csfx/.env -Environment=ETCD_ENDPOINT=http://localhost:2379 -Environment=ETCD_USERNAME=csf -Environment=GHCR_ORG=csfx-cloud -Environment=COMPOSE_FILE=/opt/csfx/docker-compose.prod.yml -Environment=POLL_INTERVAL=30 -ExecStart=/opt/csfx/csfx-updater.sh -Restart=always -RestartSec=10 -StandardOutput=journal -StandardError=journal -SyslogIdentifier=csfx-updater - -[Install] -WantedBy=multi-user.target diff --git a/deployments/systemd/csfx-updater.sh b/deployments/systemd/csfx-updater.sh deleted file mode 100755 index c0fb533..0000000 --- a/deployments/systemd/csfx-updater.sh +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -ETCD_ENDPOINT="${ETCD_ENDPOINT:-http://localhost:2379}" -ETCD_USERNAME="${ETCD_USERNAME:-csfx}" -ETCD_PASSWORD="${ETCD_PASSWORD:?ETCD_PASSWORD must be set}" -COMPOSE_FILE="${COMPOSE_FILE:-/opt/csfxx/docker-compose.prod.yml}" -GHCR_ORG="${GHCR_ORG:-csfx-cloud}" -POLL_INTERVAL="${POLL_INTERVAL:-30}" - -GHCR_TOKEN="${GHCR_TOKEN:?GHCR_TOKEN must be set}" -ETCD_DESIRED_KEY="/csfx/config/desired_cp_version" -ETCD_RESULT_KEY="/csfx/config/last_update_result" - -SERVICES=(api-gateway registry scheduler volume-manager failover-controller sdn-controller) - -log() { - echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*" -} - -etcd_auth_token() { - curl -sf "${ETCD_ENDPOINT}/v3/auth/authenticate" \ - -X POST \ - -H "Content-Type: application/json" \ - -d "{\"name\": \"${ETCD_USERNAME}\", \"password\": \"${ETCD_PASSWORD}\"}" \ - | jq -r '.token // empty' -} - -etcd_get() { - local token - token="$(etcd_auth_token)" - curl -sf "${ETCD_ENDPOINT}/v3/kv/range" \ - -X POST \ - -H "Content-Type: application/json" \ - -H "Authorization: ${token}" \ - -d "{\"key\": \"$(printf '%s' "$1" | base64 -w0)\"}" \ - | jq -r '.kvs[0].value // empty' \ - | base64 -d 2>/dev/null || true -} - -etcd_put() { - local token - token="$(etcd_auth_token)" - curl -sf "${ETCD_ENDPOINT}/v3/kv/put" \ - -X POST \ - -H "Content-Type: application/json" \ - -H "Authorization: ${token}" \ - -d "{\"key\": \"$(printf '%s' "$1" | base64 -w0)\", \"value\": \"$(printf '%s' "$2" | base64 -w0)\"}" \ - > /dev/null -} - -current_version() { - docker compose -f "$COMPOSE_FILE" \ - --env-file "$(dirname "$COMPOSE_FILE")/.env" \ - images --format json 2>/dev/null \ - | jq -r '.[0].Tag // empty' \ - | head -1 || true -} - -ghcr_digest() { - local image="$1" tag="$2" - curl -sf \ - -H "Authorization: Bearer ${GHCR_TOKEN}" \ - -H "Accept: application/vnd.docker.distribution.manifest.v2+json" \ - "https://ghcr.io/v2/${image}/manifests/${tag}" \ - -I | grep -i "^docker-content-digest:" | tr -d '[:space:]' | cut -d: -f2- -} - -local_digest() { - docker inspect --format='{{index .RepoDigests 0}}' "$1" 2>/dev/null \ - | cut -d@ -f2 || true -} - -verify_images() { - local version="$1" - log "verifying image digests against GHCR" - for svc in "${SERVICES[@]}"; do - local image="ghcr.io/${GHCR_ORG}/csfx-ce-${svc}" - local remote_digest local_dig - remote_digest="$(ghcr_digest "${GHCR_ORG}/csfx-ce-${svc}" "${version}")" - local_dig="$(local_digest "${image}:${version}")" - - if [[ -z "$remote_digest" ]]; then - log "failed to fetch remote digest for ${svc}" - return 1 - fi - if [[ "$remote_digest" != "$local_dig" ]]; then - log "digest mismatch for ${svc}: remote=${remote_digest} local=${local_dig}" - return 1 - fi - log "verified ${svc}: ${remote_digest}" - done -} - -run_update() { - local version="$1" - log "starting update to ${version}" - - etcd_put "$ETCD_RESULT_KEY" "in_progress" - - log "pulling images" - if ! GHCR_ORG="$GHCR_ORG" CSFX_VERSION="$version" \ - docker compose -f "$COMPOSE_FILE" pull; then - log "pull failed" - etcd_put "$ETCD_RESULT_KEY" "failed" - return 1 - fi - - if ! verify_images "$version"; then - log "image verification failed, aborting update" - etcd_put "$ETCD_RESULT_KEY" "failed" - return 1 - fi - - log "restarting services" - if ! GHCR_ORG="$GHCR_ORG" CSFX_VERSION="$version" \ - docker compose -f "$COMPOSE_FILE" up -d; then - log "up failed" - etcd_put "$ETCD_RESULT_KEY" "failed" - return 1 - fi - - log "waiting for health checks" - sleep 15 - if ! GHCR_ORG="$GHCR_ORG" CSFX_VERSION="$version" \ - docker compose -f "$COMPOSE_FILE" ps --format json \ - | jq -e '[.[] | select(.Health == "unhealthy")] | length == 0' > /dev/null 2>&1; then - log "health check failed" - etcd_put "$ETCD_RESULT_KEY" "failed" - return 1 - fi - - etcd_put "$ETCD_RESULT_KEY" "success" - log "update to ${version} complete" -} - -is_valid_version() { - [[ "$1" =~ ^v?[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9._-]+)?$ ]] -} - -log "csfx-updater started, polling etcd every ${POLL_INTERVAL}s" - -last_applied="" - -while true; do - desired="$(etcd_get "$ETCD_DESIRED_KEY")" - - if [[ -n "$desired" && "$desired" != "$last_applied" ]]; then - if ! is_valid_version "$desired"; then - log "rejected invalid version string: ${desired}" - etcd_put "$ETCD_RESULT_KEY" "failed" - last_applied="$desired" - sleep "$POLL_INTERVAL" - continue - fi - - log "desired version: ${desired}, last applied: ${last_applied:-none}" - if run_update "$desired"; then - last_applied="$desired" - else - last_applied="$desired" - fi - fi - - sleep "$POLL_INTERVAL" -done diff --git a/deployments/systemd/install.sh b/deployments/systemd/install.sh deleted file mode 100755 index 57c447e..0000000 --- a/deployments/systemd/install.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -CSFX_DIR="/opt/csfxx" - -if [[ "$EUID" -ne 0 ]]; then - echo "run as root" - exit 1 -fi - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -if ! id csfx-updater &>/dev/null; then - useradd --system --no-create-home --shell /usr/sbin/nologin csfx-updater - usermod -aG docker csfx-updater - echo "created csfx-updater system user" -fi - -mkdir -p "$CSFX_DIR" -chown csfx-updater:docker "$CSFX_DIR" - -cp "${REPO_ROOT}/docker-compose.prod.yml" "${CSFX_DIR}/docker-compose.prod.yml" -cp "${SCRIPT_DIR}/csfx-updater.sh" "${CSFX_DIR}/csfx-updater.sh" -chmod 750 "${CSFX_DIR}/csfx-updater.sh" -chown csfx-updater:docker "${CSFX_DIR}/csfx-updater.sh" - -if [[ ! -f "${CSFX_DIR}/.env" ]]; then - cp "${REPO_ROOT}/.env.example" "${CSFX_DIR}/.env" - chmod 640 "${CSFX_DIR}/.env" - chown csfx-updater:docker "${CSFX_DIR}/.env" - echo "created ${CSFX_DIR}/.env — fill in values before starting" -fi - -cp "${SCRIPT_DIR}/csfx-updater.service" /etc/systemd/system/csfx-updater.service - -if command -v ufw &>/dev/null; then - ufw deny in 2379/tcp comment "etcd - internal only" - ufw deny in 2380/tcp comment "etcd peer - internal only" - echo "ufw rules added: etcd ports 2379/2380 blocked from external access" -elif command -v firewall-cmd &>/dev/null; then - firewall-cmd --permanent --add-rich-rule='rule port port="2379" protocol="tcp" reject' - firewall-cmd --permanent --add-rich-rule='rule port port="2380" protocol="tcp" reject' - firewall-cmd --reload - echo "firewalld rules added: etcd ports 2379/2380 blocked from external access" -fi - -systemctl daemon-reload -systemctl enable csfx-updater -systemctl start csfx-updater - -echo "csfx-updater installed and started" -echo "logs: journalctl -u csfx-updater -f" From ddbfc81032daae687355fd3fba90b7ea7662a820 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sun, 10 May 2026 15:50:24 +0200 Subject: [PATCH 09/15] fix: pipeline build error --- .github/workflows/docker-build.yml | 10 ++++++++++ .github/workflows/prerelease.yml | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 531e181..f22d6d6 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -343,6 +343,16 @@ jobs: sha256 = "$(get_cp_sha256 csfx-cp-sdn-controller-arm64)"; }; }; + csfx-updater = { + amd64 = { + url = "${RELEASE_BASE}/csfx-updater-amd64"; + sha256 = "$(get_sha256 csfx-updater-amd64)"; + }; + arm64 = { + url = "${RELEASE_BASE}/csfx-updater-arm64"; + sha256 = "$(get_sha256 csfx-updater-arm64)"; + }; + }; }; }; } diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index dcf11cc..46874d9 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -339,6 +339,16 @@ jobs: sha256 = "$(get_cp_sha256 csfx-cp-sdn-controller-arm64)"; }; }; + csfx-updater = { + amd64 = { + url = "${RELEASE_BASE}/csfx-updater-amd64"; + sha256 = "$(get_sha256 csfx-updater-amd64)"; + }; + arm64 = { + url = "${RELEASE_BASE}/csfx-updater-arm64"; + sha256 = "$(get_sha256 csfx-updater-arm64)"; + }; + }; }; }; } From 839fd2836017f3829bc997cea6e8adfe71934719 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sun, 10 May 2026 16:04:36 +0200 Subject: [PATCH 10/15] fix: build error with updater hash --- .github/workflows/docker-build.yml | 6 ++++++ .github/workflows/prerelease.yml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index f22d6d6..070ec9c 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -236,6 +236,12 @@ jobs: path: /tmp/binaries merge-multiple: true + - uses: actions/download-artifact@v4 + with: + pattern: csfx-updater-* + path: /tmp/binaries + merge-multiple: true + - uses: actions/download-artifact@v4 with: pattern: csfx-cp-* diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 46874d9..2035870 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -232,6 +232,12 @@ jobs: path: /tmp/binaries merge-multiple: true + - uses: actions/download-artifact@v4 + with: + pattern: csfx-updater-* + path: /tmp/binaries + merge-multiple: true + - uses: actions/download-artifact@v4 with: pattern: csfx-cp-* From 0ee18965f068a7e526fb64b87bdf3ffb75dcde4a Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sun, 10 May 2026 17:48:01 +0200 Subject: [PATCH 11/15] fix(agent): rename state dir from csfx-daemon to csfx-agent --- agent/src/config.rs | 6 +++--- agent/src/pki.rs | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/agent/src/config.rs b/agent/src/config.rs index 3a99fd8..82c6d1f 100644 --- a/agent/src/config.rs +++ b/agent/src/config.rs @@ -3,9 +3,9 @@ use serde::{Deserialize, Serialize}; use std::path::Path; use uuid::Uuid; -const STATE_DIR: &str = "/var/lib/csfx-daemon"; -const CREDENTIALS_FILE: &str = "/var/lib/csfx-daemon/credentials"; -const CONFIG_FILE: &str = "/var/lib/csfx-daemon/config.json"; +const STATE_DIR: &str = "/var/lib/csfx-agent"; +const CREDENTIALS_FILE: &str = "/var/lib/csfx-agent/credentials"; +const CONFIG_FILE: &str = "/var/lib/csfx-agent/config.json"; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DaemonConfig { diff --git a/agent/src/pki.rs b/agent/src/pki.rs index 4b103dc..95ea1fe 100644 --- a/agent/src/pki.rs +++ b/agent/src/pki.rs @@ -2,10 +2,10 @@ use anyhow::{Context, Result}; use rcgen::{CertificateParams, DnType, KeyPair, PKCS_ECDSA_P256_SHA256}; use std::path::Path; -const KEY_FILE: &str = "/var/lib/csfx-daemon/agent.key"; -const CSR_FILE: &str = "/var/lib/csfx-daemon/agent.csr"; -const CERT_FILE: &str = "/var/lib/csfx-daemon/agent.crt"; -const CA_FILE: &str = "/var/lib/csfx-daemon/ca.crt"; +const KEY_FILE: &str = "/var/lib/csfx-agent/agent.key"; +const CSR_FILE: &str = "/var/lib/csfx-agent/agent.csr"; +const CERT_FILE: &str = "/var/lib/csfx-agent/agent.crt"; +const CA_FILE: &str = "/var/lib/csfx-agent/ca.crt"; pub struct AgentPki { key_pem: String, From 2728d06c157b95e2d9124fc2e449c9411075c978 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sun, 10 May 2026 19:51:23 +0200 Subject: [PATCH 12/15] fix: added log for testing updater --- control-plane/registry/src/handlers/agent.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/control-plane/registry/src/handlers/agent.rs b/control-plane/registry/src/handlers/agent.rs index a42084f..834144f 100644 --- a/control-plane/registry/src/handlers/agent.rs +++ b/control-plane/registry/src/handlers/agent.rs @@ -217,6 +217,13 @@ pub async fn heartbeat( let post_update_heartbeats = increment_post_update_heartbeats(&state.etcd_endpoints, agent_id).await; + tracing::info!( + agent_id = %agent_id, + desired_flake_rev = ?desired_flake_rev, + post_update_heartbeats = ?post_update_heartbeats, + "heartbeat processed" + ); + Ok(Json(HeartbeatResponse { success: true, message: "Heartbeat recorded".to_string(), From c9f30e96cc9f1a59bcbea6efc885597d1139cdbb Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sun, 10 May 2026 20:01:23 +0200 Subject: [PATCH 13/15] fix(gateway): exempt registry routes from rate limiting --- control-plane/api-gateway/src/routes/mod.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/control-plane/api-gateway/src/routes/mod.rs b/control-plane/api-gateway/src/routes/mod.rs index 2b70e23..4e19e8e 100644 --- a/control-plane/api-gateway/src/routes/mod.rs +++ b/control-plane/api-gateway/src/routes/mod.rs @@ -75,25 +75,27 @@ pub fn create_router() -> Router { .allow_headers(vec![AUTHORIZATION, ACCEPT, CONTENT_TYPE]) .allow_credentials(true); + let internal_api_router = Router::new() + .merge(registry::registry_routes()); + let api_router = Router::new() .merge(agents::agents_routes()) .merge(networks::networks_routes()) .merge(organizations::routes()) - .merge(registry::registry_routes()) .merge(system::routes()) .merge(update::routes()) .merge(users::users_routes()) .merge(volumes::volumes_routes()) .merge(workloads::workloads_routes()) - .merge(events::events_routes()); + .merge(events::events_routes()) + .layer(GovernorLayer { + config: governor_config, + }); Router::new() .route("/metrics", get(metrics::metrics_handler)) - // API routes .logged_nest("/api", api_router) - .layer(GovernorLayer { - config: governor_config, - }) + .logged_nest("/api", internal_api_router) .layer( TraceLayer::new_for_http() .make_span_with(|request: &Request| { From 8aaeef0ff44c77e752e04c9dcc5fc0c5a57264bb Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sun, 10 May 2026 20:25:52 +0200 Subject: [PATCH 14/15] fix: added log for testing update flow --- agent/src/main.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/agent/src/main.rs b/agent/src/main.rs index 0324388..90ecf64 100644 --- a/agent/src/main.rs +++ b/agent/src/main.rs @@ -191,6 +191,12 @@ async fn run_heartbeat_loop( failure_count = 0; } + info!( + agent_id = %agent_id, + desired_flake_rev = ?resp.desired_flake_rev, + "heartbeat ok" + ); + if let Some(count) = resp.post_update_heartbeats { update_watch::write_heartbeat_counter(count).await; } From 87a3a86b1afeb8451e0a376a2f144cf92fddbc2f Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Thu, 14 May 2026 19:27:37 +0200 Subject: [PATCH 15/15] refactor: delete old nix config --- nixos-node/.gitignore | 26 -- nixos-node/DEPLOYMENT.md | 177 ----------- nixos-node/README.md | 131 -------- nixos-node/deploy.sh | 27 -- nixos-node/flake.lock | 27 -- nixos-node/flake.nix | 83 ----- nixos-node/logo.txt | 44 --- nixos-node/modules/iso-configuration.nix | 161 ---------- nixos-node/modules/server-configuration.nix | 329 -------------------- 9 files changed, 1005 deletions(-) delete mode 100644 nixos-node/.gitignore delete mode 100644 nixos-node/DEPLOYMENT.md delete mode 100644 nixos-node/README.md delete mode 100755 nixos-node/deploy.sh delete mode 100644 nixos-node/flake.lock delete mode 100644 nixos-node/flake.nix delete mode 100644 nixos-node/logo.txt delete mode 100644 nixos-node/modules/iso-configuration.nix delete mode 100644 nixos-node/modules/server-configuration.nix diff --git a/nixos-node/.gitignore b/nixos-node/.gitignore deleted file mode 100644 index 3988d69..0000000 --- a/nixos-node/.gitignore +++ /dev/null @@ -1,26 +0,0 @@ -# Nix build results -result -result-* - -# Flake lock file (uncommit if you want to pin versions) -# flake.lock - -# ISO images -*.iso - -# QEMU disk images -*.qcow2 - -# Nix temporary files -.nix-defexpr -.nix-profile - -# VM state -*.sock -*.pid - -# Build logs -build.log -nix-build.log - -keys/ diff --git a/nixos-node/DEPLOYMENT.md b/nixos-node/DEPLOYMENT.md deleted file mode 100644 index aec67db..0000000 --- a/nixos-node/DEPLOYMENT.md +++ /dev/null @@ -1,177 +0,0 @@ -# CSFX-Core NixOS Deployment Guide - -## Voraussetzungen - -1. **NixOS auf dem Zielserver installiert** (z.B. mit der ISO aus diesem Projekt) -2. **SSH-Zugriff als root** mit Key-basierter Authentifizierung -3. **Nix mit Flakes** auf deinem Mac installiert - -## Schritt 1: SSH-Key einrichten - -Falls noch nicht vorhanden, generiere einen SSH-Key: - -```bash -ssh-keygen -t ed25519 -C "csfx-deployment@mac" -``` - -Kopiere den Public Key auf den Server: - -```bash -ssh-copy-id root@dein-server -# oder: -cat ~/.ssh/id_ed25519.pub | ssh root@dein-server "mkdir -p ~/.ssh && cat >> ~/.ssh/authorized_keys" -``` - -## Schritt 2: Konfiguration anpassen - -Bearbeite [`modules/server-configuration.nix`](modules/server-configuration.nix): - -1. **SSH Public Key eintragen** (Zeile 51): - - ```nix - users.users.root = { - openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1... dein-key-hier" - ]; - }; - ``` - -2. **Netzwerk-Konfiguration anpassen** (Zeile 21-35): - - Interface-Name (`eth0` → dein Interface) - - IP-Adresse (falls statisch gewünscht) - - Gateway und DNS - -3. **Boot-Loader anpassen** (Zeile 8-13): - - BIOS: `device = "/dev/sda";` - - UEFI: Kommentare entfernen und anpassen - -## Schritt 3: Deployment - -### Remote-Deployment von deinem Mac: - -```bash -cd /Volumes/CedricExterne/Coding/CSFX-Core/nixos-node - -# Deployment auf den Server -nixos-rebuild switch --flake .#csfx-server --target-host root@dein-server --use-remote-sudo -``` - -**Optionen:** - -- `--flake .#csfx-server`: Verwendet die `csfx-server`-Konfiguration aus der flake.nix -- `--target-host root@dein-server`: Deployment-Ziel (ersetze `dein-server` mit IP oder Hostname) -- `--use-remote-sudo`: Verwendet sudo auf dem Remote-Server (falls benötigt) -- `--build-host localhost`: Build lokal auf dem Mac (statt auf dem Server) - -### Lokales Build + Deployment (schneller): - -```bash -# 1. Build lokal -nix build .#nixosConfigurations.csfx-server.config.system.build.toplevel - -# 2. Auf Server kopieren und aktivieren -nixos-rebuild switch --flake .#csfx-server --target-host root@dein-server --build-host localhost -``` - -## Schritt 4: Verifizierung - -Nach dem Deployment, teste die Installation: - -```bash -# SSH auf den Server -ssh root@dein-server - -# Test-Script ausführen -./test-docker.sh - -# Oder manuell testen -systemctl status docker -docker ps -curl http://localhost:8080 -``` - -Von deinem Mac aus: - -```bash -# Nginx testen (ersetze IP) -curl http://dein-server:8080 -curl http://dein-server:8080/health -``` - -## Schritt 5: Kontinuierliche Updates - -Nach Änderungen an der Konfiguration: - -```bash -# Änderungen committen (optional, aber empfohlen) -git add modules/server-configuration.nix -git commit -m "Update server config" - -# Deployment -nixos-rebuild switch --flake .#csfx-server --target-host root@dein-server -``` - -## Troubleshooting - -### SSH-Verbindung schlägt fehl - -```bash -# Test SSH-Verbindung -ssh -v root@dein-server - -# Prüfe authorized_keys auf dem Server -ssh root@dein-server "cat ~/.ssh/authorized_keys" -``` - -### Build schlägt fehl - -```bash -# Lokaler Test-Build -nix build .#nixosConfigurations.csfx-server.config.system.build.toplevel --show-trace -``` - -### Konfiguration validieren - -```bash -# Syntax-Check ohne Deployment -nixos-rebuild dry-build --flake .#csfx-server --target-host root@dein-server -``` - -### Rollback bei Problemen - -```bash -# Auf dem Server: Zur vorherigen Generation zurückkehren -ssh root@dein-server -nixos-rebuild switch --rollback -``` - -## Alternative: Lokales Deployment - -Falls du direkten Zugriff auf den Server hast: - -```bash -# 1. Repo auf den Server klonen -ssh root@dein-server -git clone https://github.com/CS-Foundry/CSFX-Core.git /etc/nixos/csfx-core - -# 2. Lokal auf dem Server bauen und aktivieren -cd /etc/nixos/csfx-core/nixos-node -nixos-rebuild switch --flake .#csfx-server -``` - -## Architektur-Wechsel - -Für ARM-Server (Raspberry Pi, etc.), ändere in `flake.nix`: - -```nix -nixosConfigurations.csfx-server = nixpkgs.lib.nixosSystem { - system = "aarch64-linux"; # statt "x86_64-linux" - modules = [ ./modules/server-configuration.nix ]; -}; -``` - -Dann deployment: - -```bash -nixos-rebuild switch --flake .#csfx-server --target-host root@raspberry-pi --build-host localhost -``` diff --git a/nixos-node/README.md b/nixos-node/README.md deleted file mode 100644 index 2c5e19e..0000000 --- a/nixos-node/README.md +++ /dev/null @@ -1,131 +0,0 @@ -# CSFX-Core Docker Test ISO - -Diese Konfiguration erstellt ein bootfähiges NixOS ISO-Image mit Docker und Docker Compose für einfache Container-Tests. - -## 🚀 ISO bauen - -### Mit Flakes (empfohlen) - -```bash -cd nixos-node/ -nix build .#nixosConfigurations.iso.config.system.build.isoImage -``` - -Das ISO-Image wird unter `./result/iso/` erstellt. - -## 📦 Was ist enthalten? - -- **Docker & Docker Compose** - Container-Management und Orchestrierung -- **Nginx Test Container** - Über docker-compose automatisch gestartet auf Port 8080 -- **Test-Script** - Automatische Tests für Docker-Funktionalität - -## 🔧 Konfiguration - -### Ports - -- `8080` - Nginx Test Container - -### Automatisch gestartete Services - -- Docker Daemon -- Docker Compose mit nginx Container - -## 🎯 Verwendung - -### 1. ISO auf USB-Stick schreiben - -```bash -sudo dd if=result/iso/*.iso of=/dev/sdX bs=4M status=progress -``` - -### 2. System booten - -Boote von dem USB-Stick. Das System startet automatisch als Root und Docker Compose startet den nginx Container. - -### 3. Container testen - -```bash -# Webseite öffnen -curl http://localhost:8080 - -# Container-Status prüfen -docker ps -a - -# Docker Compose Status -docker-compose ps -``` - -### 4. Vollständiger Test - -```bash -# Führe das bereitgestellte Test-Script aus -./test-docker.sh -``` - -## 🐳 Docker Tests - -### Container verwalten - -```bash -# Container stoppen -docker-compose down - -# Container neu starten -docker-compose up -d - -# Logs ansehen -docker-compose logs -``` - -### Eigene Container testen - -```bash -# Eigenes docker-compose.yml erstellen -cat > test-compose.yml < /dev/null 2>&1; then - echo "[ERROR] ssh connection failed target=$TARGET_HOST" - exit 1 -fi - -REMOTE_DIR="/tmp/csfx-nixos-deploy-$$" -ssh "$TARGET_HOST" "mkdir -p $REMOTE_DIR" -rsync -az --delete \ - --exclude='.git' \ - --exclude='*.qcow2' \ - --exclude='result' \ - ./ "$TARGET_HOST:$REMOTE_DIR/" - -ssh -t "$TARGET_HOST" "cd $REMOTE_DIR && sudo nixos-rebuild switch --flake .#$FLAKE_NAME" - -ssh "$TARGET_HOST" "rm -rf $REMOTE_DIR" - -echo "[INFO] deployment complete target=$TARGET_HOST" diff --git a/nixos-node/flake.lock b/nixos-node/flake.lock deleted file mode 100644 index fe08f56..0000000 --- a/nixos-node/flake.lock +++ /dev/null @@ -1,27 +0,0 @@ -{ - "nodes": { - "nixpkgs": { - "locked": { - "lastModified": 1751274312, - "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-24.11", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "nixpkgs": "nixpkgs" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/nixos-node/flake.nix b/nixos-node/flake.nix deleted file mode 100644 index 1b0f169..0000000 --- a/nixos-node/flake.nix +++ /dev/null @@ -1,83 +0,0 @@ -{ - description = "CSFX Node — binary builds and server configuration"; - - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; - rust-overlay = { - url = "github:oxalica/rust-overlay"; - inputs.nixpkgs.follows = "nixpkgs"; - }; - }; - - outputs = { self, nixpkgs, rust-overlay }: - let - system = "x86_64-linux"; - pkgs = import nixpkgs { - inherit system; - overlays = [ rust-overlay.overlays.default ]; - }; - - rustToolchain = pkgs.rust-bin.stable."1.88.0".default.override { - extensions = [ "rust-src" ]; - targets = [ "x86_64-unknown-linux-gnu" "x86_64-unknown-linux-musl" ]; - }; - - platform = pkgs.makeRustPlatform { - cargo = rustToolchain; - rustc = rustToolchain; - }; - - csfAgentPkg = platform.buildRustPackage { - pname = "csfx-agent"; - version = "0.2.2"; - src = ../.; - cargoLock.lockFile = ../Cargo.lock; - buildAndTestSubdir = "agent"; - nativeBuildInputs = [ pkgs.pkg-config ]; - buildInputs = [ pkgs.openssl ]; - }; - - csfUpdaterPkg = platform.buildRustPackage { - pname = "csfx-updater"; - version = "0.2.2"; - src = ../.; - cargoLock.lockFile = ../Cargo.lock; - buildAndTestSubdir = "control-plane/csfx-updater"; - nativeBuildInputs = [ pkgs.pkg-config pkgs.protobuf ]; - buildInputs = []; - doCheck = false; - }; - - versions = import ../CSFX-Infra/versions.nix; - - serverSpecialArgs = { - csfx.agentPackage = csfAgentPkg; - csfx.updaterPackage = csfUpdaterPkg; - inherit versions; - }; - in - { - nixosConfigurations.csfx-server = nixpkgs.lib.nixosSystem { - inherit system; - specialArgs = serverSpecialArgs; - modules = [ ./modules/server-configuration.nix ]; - }; - - nixosConfigurations.csfx-iso = nixpkgs.lib.nixosSystem { - inherit system; - specialArgs = serverSpecialArgs; - modules = [ ./modules/iso-configuration.nix ]; - }; - - packages.${system} = { - csfx-agent = csfAgentPkg; - csfx-updater = csfUpdaterPkg; - default = csfAgentPkg; - iso = nixpkgs.lib.nixosSystem { - inherit system; - specialArgs = serverSpecialArgs; - modules = [ ./modules/iso-configuration.nix ]; - }.config.system.build.isoImage; - }; - }; -} diff --git a/nixos-node/logo.txt b/nixos-node/logo.txt deleted file mode 100644 index eeaf58f..0000000 --- a/nixos-node/logo.txt +++ /dev/null @@ -1,44 +0,0 @@ - - - - ..,,,,,,,,,,,,,,,,,,,,,,,;,,,,,,,,,,,,,,'.. . - ..ckXXNNNNNNNNNNNNNNNNNNNNNNNNNNXXXXXXXXKx;. - ..cONWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWOc.. - ..ckNWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMW0l.. . .. - ..ckNWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMW0l.. . - ..ckXWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMW0l.. - . .;kXWMMMMMMMMWKOkkkkkkkkkkkkkkkkkkkkkkkkxc.. . - .oNWMMMMMMMMNx,.......................... - .oNMMMMMMMMMK:. - .oNMMMMMMMMMK; ......................... - .oNMMMMMMMMMK; .'lddddddddddddddddddddddc'. . - .oNMMMMMMMMMK; .'o0NWWWWWWWWWWWWWWWWWWWWKd,. - .oNMMMMMMMMMK; .'l0NWMMMMMMMMMMMMMMMMMMWXx,. - .oNMMMMMMMMMK; .'l0NWMMMMMMMMMMMMMMMMMMWXx;. - .oNWMMMMMMMMK:'l0NWMMMMMMMMMMMMMMMMMMWXx;. - .cKWMMMMMMMMXOOXWMMMMWWWWWWWWWWWWWWWXx;. - .,dKWMMMMMMXo;lKMMMNOl:;;;;;;;;;;;;,. - .,dKWMMMM0; 'kWMMMNOl'. - .,dKWMM0; 'kWMMMMMN0o,... - .,dKW0; 'kWMMMMMMMWKkc. - .,ox, 'kWMMMMMMMMNXo. - ... 'kWMMMMMMMMNXd. - 'kWMMMMMMMMNXd. - 'kWMMMMMMMMNXd. - 'kWMMMMMMMMNXo. - 'kWMMMMMMMMNXo. - 'kWMMMMMMMMNXo. - 'kWMMMMMMMNkl;. - 'kWMMMMMNk:... - 'kWMMMNk:. - 'kWWXx;. - . 'kKx;. - .;,. - .. - - - - - - - diff --git a/nixos-node/modules/iso-configuration.nix b/nixos-node/modules/iso-configuration.nix deleted file mode 100644 index f9b8e8c..0000000 --- a/nixos-node/modules/iso-configuration.nix +++ /dev/null @@ -1,161 +0,0 @@ -{ config, pkgs, lib, csfx, versions, ... }: - -let - updateUnitsModule = import ../../../CSFX-Infra/modules/update-units.nix; - - installScript = pkgs.writeShellScript "csfx-install" '' - set -euo pipefail - - DISK="" - - for dev in sda vda nvme0n1; do - if [ -b "/dev/$dev" ]; then - DISK="/dev/$dev" - break - fi - done - - if [ -z "$DISK" ]; then - echo "[csfx-install] ERROR: no suitable disk found" >&2 - exit 1 - fi - - echo "[csfx-install] target disk: $DISK" - - if [[ "$DISK" == *nvme* ]]; then - PART_BOOT="${DISK}p1" - PART_ROOT="${DISK}p2" - else - PART_BOOT="${DISK}1" - PART_ROOT="${DISK}2" - fi - - parted "$DISK" -- mklabel gpt - parted "$DISK" -- mkpart ESP fat32 1MB 512MB - parted "$DISK" -- mkpart primary ext4 512MB 100% - parted "$DISK" -- set 1 esp on - - mkfs.fat -F 32 -n boot "$PART_BOOT" - mkfs.ext4 -L nixos "$PART_ROOT" - - mount "$PART_ROOT" /mnt - mkdir -p /mnt/boot - mount "$PART_BOOT" /mnt/boot - - echo "[csfx-install] partitioning complete, running nixos-install" - - nixos-install \ - --no-root-passwd \ - --flake /iso/csfx-flake#csfx-server - - echo "[csfx-install] installation complete — rebooting in 5s" - sleep 5 - reboot - ''; - - logoText = builtins.readFile ../logo.txt; - - motd = pkgs.writeText "csfx-motd" '' - ${logoText} - - ╔══════════════════════════════════════════════════════════════════╗ - ║ CSFX Node Installer ║ - ║ ║ - ║ Automatische Installation startet in 10 Sekunden. ║ - ║ CTRL+C zum Abbrechen und manuellem Eingriff. ║ - ║ ║ - ║ Nach der Installation: ║ - ║ - csfx-agent verbindet sich mit dem API Gateway ║ - ║ - Updates laufen automatisch via GitOps ║ - ║ ║ - ╚══════════════════════════════════════════════════════════════════╝ - ''; -in -{ - imports = [ - - updateUnitsModule - ]; - - system.stateVersion = "25.05"; - - isoImage.volumeID = "CSFX-NODE"; - isoImage.edition = lib.mkForce "csfx"; - isoImage.prependToMenuLabel = "CSFX Node Installer — "; - isoImage.makeEfiBootable = true; - isoImage.makeUsbBootable = true; - - isoImage.storeContents = [ - csfx.agentPackage - csfx.updaterPackage - ]; - - isoImage.contents = [ - { - source = ../../../CSFX-Infra; - target = "/csfx-flake"; - } - ]; - - boot.kernelParams = [ - "console=ttyS0,115200n8" - "console=tty0" - "quiet" - ]; - - boot.loader.timeout = lib.mkForce 10; - - networking = { - hostName = "csfx-installer"; - useDHCP = true; - firewall.enable = false; - }; - - time.timeZone = "UTC"; - - services.getty.autologinUser = lib.mkForce "root"; - - users.users.root = { - initialPassword = ""; - shell = pkgs.bash; - }; - - services.openssh = { - enable = true; - settings = { - PermitRootLogin = "yes"; - PasswordAuthentication = true; - }; - }; - - environment.etc."motd".source = motd; - - systemd.services.csfx-autoinstall = { - description = "CSFX automatic node installer"; - after = [ "network-online.target" "getty.target" ]; - wants = [ "network-online.target" ]; - wantedBy = [ "multi-user.target" ]; - serviceConfig = { - Type = "oneshot"; - ExecStartPre = "${pkgs.coreutils}/bin/sleep 10"; - ExecStart = installScript; - StandardOutput = "journal+console"; - StandardError = "journal+console"; - }; - }; - - nix.settings = { - experimental-features = [ "nix-command" "flakes" ]; - trusted-users = [ "root" ]; - }; - - environment.systemPackages = with pkgs; [ - git - curl - parted - dosfstools - e2fsprogs - jq - vim - ]; -} diff --git a/nixos-node/modules/server-configuration.nix b/nixos-node/modules/server-configuration.nix deleted file mode 100644 index fc475e9..0000000 --- a/nixos-node/modules/server-configuration.nix +++ /dev/null @@ -1,329 +0,0 @@ -{ config, pkgs, lib, csfx, versions, ... }: - -let - updateUnitsModule = import ../../../CSFX-Infra/modules/update-units.nix; - composeDir = "/etc/csfx-core"; -in -{ - imports = [ updateUnitsModule ]; - - system.stateVersion = "25.05"; - - boot = { - loader.grub = { - enable = true; - device = "/dev/sda"; - }; - initrd.availableKernelModules = [ "ata_piix" "uhci_hcd" "virtio_pci" "virtio_scsi" "sd_mod" "sr_mod" ]; - }; - - fileSystems."/" = { - device = "/dev/disk/by-label/nixos"; - fsType = "ext4"; - }; - - fileSystems."/boot" = { - device = "/dev/disk/by-label/boot"; - fsType = "vfat"; - }; - - swapDevices = []; - - boot.kernel.sysctl = { - "vm.swappiness" = 1; - "vm.dirty_ratio" = 10; - "vm.dirty_background_ratio" = 5; - "vm.vfs_cache_pressure" = 50; - }; - - nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; - - networking = { - hostName = "csfx-node"; - useDHCP = true; - firewall = { - enable = true; - allowedTCPPorts = [ 22 8000 ]; - }; - }; - - time.timeZone = "UTC"; - - services.openssh = { - enable = true; - settings = { - PermitRootLogin = "prohibit-password"; - PasswordAuthentication = false; - }; - }; - - users.users.admin = { - isNormalUser = true; - extraGroups = [ "wheel" "docker" ]; - openssh.authorizedKeys.keys = []; - }; - - security.sudo.wheelNeedsPassword = false; - - virtualisation.docker = { - enable = true; - enableOnBoot = true; - }; - - users.users.csfx-agent = { - isSystemUser = true; - group = "csfx-agent"; - home = "/var/lib/csfxx-daemon"; - createHome = true; - }; - users.groups.csfx-agent = {}; - users.groups.csfx-updater = {}; - - systemd.tmpfiles.rules = [ - "d /var/lib/csfxx-daemon 0750 csfx-agent csfx-agent -" - "d /var/lib/csfxx 0750 csfx-agent csfx-updater -" - "f /var/lib/csfxx/update_trigger 0660 csfx-agent csfx-updater -" - "d /var/lib/csfxx-updater 0750 root root -" - "d /var/lib/csfxx-updater/infra.git 0750 root root -" - ]; - - systemd.services.csfx-agent = { - description = "CSFX Agent Daemon"; - wantedBy = [ "multi-user.target" ]; - after = [ "network-online.target" ]; - wants = [ "network-online.target" ]; - serviceConfig = { - ExecStart = "${csfx.agentPackage}/bin/csfx-agent"; - User = "csfx-agent"; - Group = "csfx-agent"; - Restart = "on-failure"; - RestartSec = "10s"; - PrivateTmp = true; - ProtectSystem = "strict"; - ReadWritePaths = [ "/var/lib/csfxx-daemon" "/var/lib/csfxx" ]; - NoNewPrivileges = true; - }; - environment = { - CSFX_GATEWAY_URL = "http://localhost:8000"; - CSFX_HEARTBEAT_INTERVAL = "60"; - RUST_LOG = "info"; - }; - }; - - systemd.services.csfx-updater = { - description = "CSFX GitOps Updater"; - wantedBy = [ "multi-user.target" ]; - after = [ "network-online.target" ]; - wants = [ "network-online.target" ]; - serviceConfig = { - ExecStart = "${csfx.updaterPackage}/bin/csfx-updater"; - Restart = "on-failure"; - RestartSec = "10s"; - StateDirectory = "csfx-updater"; - }; - environment = { - ETCD_ENDPOINTS = "http://localhost:2379"; - INFRA_REPO_GITHUB = "csfx-cloud/CSFX-Infra"; - INFRA_REPO_BRANCH = "main"; - INFRA_REPO_MIRROR_URL = "https://github.com/csfx-cloud/CSFX-Infra.git"; - INFRA_REPO_MIRROR_DIR = "/var/lib/csfxx-updater/infra.git"; - POLL_INTERVAL_SECS = "120"; - RUST_LOG = "info"; - }; - }; - - services.csfx-update-units = { - enable = true; - nixCacheUrl = "http://localhost:5000"; - nixCachePublicKey = ""; - }; - - nix.settings = { - experimental-features = [ "nix-command" "flakes" ]; - trusted-users = [ "root" ]; - }; - - system.activationScripts.csfx-core-compose = { - text = '' - mkdir -p ${composeDir} - - cat > ${composeDir}/docker-compose.yml <<'COMPOSE' -services: - etcd: - image: gcr.io/etcd-development/etcd:v3.5.21 - container_name: csfx-etcd - command: - - etcd - - --advertise-client-urls=http://0.0.0.0:2379 - - --listen-client-urls=http://0.0.0.0:2379 - - --data-dir=/etcd-data - volumes: - - etcd_data:/etcd-data - ports: - - "2379:2379" - restart: unless-stopped - - patroni: - image: ghcr.io/zalando/spilo-15:3.0-p1 - container_name: csfx-patroni - hostname: patroni - environment: - PATRONI_NAME: patroni - PATRONI_SCOPE: postgres-csfx - PATRONI_ETCD3_HOSTS: "etcd:2379" - PATRONI_ETCD3_PROTOCOL: http - PATRONI_POSTGRESQL_DATA_DIR: /home/postgres/pgdata - PATRONI_POSTGRESQL_LISTEN: "0.0.0.0:5432" - PATRONI_POSTGRESQL_CONNECT_ADDRESS: "patroni:5432" - PATRONI_REPLICATION_USERNAME: replicator - PATRONI_REPLICATION_PASSWORD: replpass - PATRONI_SUPERUSER_USERNAME: postgres - PATRONI_SUPERUSER_PASSWORD: postgrespass - PATRONI_RESTAPI_LISTEN: "0.0.0.0:8008" - PATRONI_RESTAPI_CONNECT_ADDRESS: "patroni:8008" - SPILO_CONFIGURATION: | - bootstrap: - initdb: - - auth-host: md5 - - auth-local: trust - post_bootstrap: /etc/csfx-bootstrap.sh - volumes: - - patroni_data:/home/postgres/pgdata - - /etc/csfx-core/patroni-bootstrap.sh:/etc/csfx-bootstrap.sh:ro - depends_on: - - etcd - healthcheck: - test: ["CMD-SHELL", "curl -sf http://localhost:8008/health | grep -q running || exit 1"] - interval: 10s - timeout: 5s - retries: 10 - start_period: 60s - restart: unless-stopped - - api-gateway: - image: ghcr.io/csfx-cloud/csfx-ce-api-gateway@${versions.csf.images.api-gateway.digest} - container_name: csfx-api-gateway - environment: - DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csfx_core - JWT_SECRET: change_me_in_production - ETCD_ENDPOINTS: http://etcd:2379 - REGISTRY_SERVICE_URL: http://registry:8001 - SCHEDULER_SERVICE_URL: http://scheduler:8002 - VOLUME_MANAGER_URL: http://volume-manager:8003 - FAILOVER_CONTROLLER_URL: http://failover-controller:8004 - SDN_CONTROLLER_URL: http://sdn-controller:8005 - RUST_LOG: info - ports: - - "8000:8000" - depends_on: - patroni: - condition: service_healthy - restart: unless-stopped - - registry: - image: ghcr.io/csfx-cloud/csfx-ce-registry@${versions.csf.images.registry.digest} - container_name: csfx-registry - environment: - DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csfx_core - ETCD_ENDPOINTS: http://etcd:2379 - REGISTRY_PORT: "8001" - RUST_LOG: info - depends_on: - patroni: - condition: service_healthy - restart: unless-stopped - - scheduler: - image: ghcr.io/csfx-cloud/csfx-ce-scheduler@${versions.csf.images.scheduler.digest} - container_name: csfx-scheduler - environment: - DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csfx_core - ETCD_ENDPOINTS: http://etcd:2379 - SCHEDULER_PORT: "8002" - RUST_LOG: info - depends_on: - patroni: - condition: service_healthy - restart: unless-stopped - - volume-manager: - image: ghcr.io/csfx-cloud/csfx-ce-volume-manager@${versions.csf.images.volume-manager.digest} - container_name: csfx-volume-manager - environment: - DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csfx_core - ETCD_ENDPOINTS: http://etcd:2379 - VOLUME_MANAGER_PORT: "8003" - RUST_LOG: info - depends_on: - patroni: - condition: service_healthy - restart: unless-stopped - - failover-controller: - image: ghcr.io/csfx-cloud/csfx-ce-failover-controller@${versions.csf.images.failover-controller.digest} - container_name: csfx-failover-controller - environment: - DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csfx_core - ETCD_ENDPOINTS: http://etcd:2379 - FAILOVER_CONTROLLER_PORT: "8004" - SCHEDULER_SERVICE_URL: http://scheduler:8002 - VOLUME_MANAGER_URL: http://volume-manager:8003 - RUST_LOG: info - depends_on: - patroni: - condition: service_healthy - restart: unless-stopped - - sdn-controller: - image: ghcr.io/csfx-cloud/csfx-ce-sdn-controller@${versions.csf.images.sdn-controller.digest} - container_name: csfx-sdn-controller - environment: - DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csfx_core - ETCD_ENDPOINTS: http://etcd:2379 - SDN_CONTROLLER_PORT: "8005" - RUST_LOG: info - depends_on: - patroni: - condition: service_healthy - restart: unless-stopped - -volumes: - etcd_data: - patroni_data: -COMPOSE - - cat > ${composeDir}/patroni-bootstrap.sh <<'BOOTSTRAP' -#!/bin/bash -psql -U postgres -c "CREATE USER csfx WITH PASSWORD 'csfpassword';" -psql -U postgres -c "CREATE DATABASE csfx_core OWNER csf;" -psql -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE csfx_core TO csf;" -BOOTSTRAP - chmod +x ${composeDir}/patroni-bootstrap.sh - ''; - deps = []; - }; - - systemd.services.csfx-control-plane = { - description = "CSFX Control Plane (Docker Compose)"; - after = [ "docker.service" "network-online.target" ]; - requires = [ "docker.service" ]; - wants = [ "network-online.target" ]; - wantedBy = [ "multi-user.target" ]; - serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; - WorkingDirectory = composeDir; - ExecStart = "${pkgs.docker}/bin/docker compose up -d --remove-orphans"; - ExecStop = "${pkgs.docker}/bin/docker compose down"; - TimeoutStartSec = "600"; - }; - }; - - environment.systemPackages = with pkgs; [ - docker-compose - curl - git - jq - etcd - ]; -}