From cbd3a05c00b58e4c7af1beb311d1d01eebf6405f Mon Sep 17 00:00:00 2001
From: Alex Gherghisan <alexghr@users.noreply.github.com>
Date: Wed, 26 Nov 2025 17:08:20 +0000
Subject: [PATCH 1/2] chore: add script to validate changes to
 network_config.json

---
 spartan/scripts/diff_network_config.sh | 124 ++++++++
 spartan/scripts/pr_network_validate.sh | 399 +++++++++++++++++++++++++
 2 files changed, 523 insertions(+)
 create mode 100755 spartan/scripts/diff_network_config.sh
 create mode 100755 spartan/scripts/pr_network_validate.sh

diff --git a/spartan/scripts/diff_network_config.sh b/spartan/scripts/diff_network_config.sh
new file mode 100755
index 000000000000..da60cb5581a7
--- /dev/null
+++ b/spartan/scripts/diff_network_config.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Diffs network config between main branch and a PR branch to extract only NEW resources
+# Usage: diff_network_config.sh <pr_number> <network_name>
+
+# Basic logging helpers
+log() { echo "[INFO]  $(date -Is) - $*" >&2; }
+err() { echo "[ERROR] $(date -Is) - $*" >&2; }
+die() { err "$*"; exit 1; }
+
+# Check arguments
+if [[ $# -lt 2 ]]; then
+  die "Usage: $0 <pr_number> <network_name>"
+fi
+
+PR_NUMBER="$1"
+NETWORK_NAME="$2"
+
+# URLs for fetching configs
+MAIN_CONFIG_URL="https://raw.githubusercontent.com/AztecProtocol/networks/main/network_config.json"
+PR_CONFIG_URL="https://raw.githubusercontent.com/AztecProtocol/networks/refs/pull/${PR_NUMBER}/merge/network_config.json"
+
+# Temporary files
+TMP_DIR=$(mktemp -d)
+trap "rm -rf $TMP_DIR" EXIT
+
+MAIN_CONFIG_FILE="${TMP_DIR}/main_config.json"
+PR_CONFIG_FILE="${TMP_DIR}/pr_config.json"
+DIFF_OUTPUT_FILE="${TMP_DIR}/diff_output.json"
+
+log "Fetching main branch network config..."
+if ! curl -f -s -L "$MAIN_CONFIG_URL" -o "$MAIN_CONFIG_FILE"; then
+  die "Failed to fetch main branch config from $MAIN_CONFIG_URL"
+fi
+
+log "Fetching PR #${PR_NUMBER} network config..."
+if ! curl -f -s -L "$PR_CONFIG_URL" -o "$PR_CONFIG_FILE"; then
+  die "Failed to fetch PR config from $PR_CONFIG_URL"
+fi
+
+# Validate network exists in both configs
+if ! jq -e ".${NETWORK_NAME}" "$MAIN_CONFIG_FILE" >/dev/null 2>&1; then
+  die "Network '${NETWORK_NAME}' not found in main branch config"
+fi
+
+if ! jq -e ".${NETWORK_NAME}" "$PR_CONFIG_FILE" >/dev/null 2>&1; then
+  die "Network '${NETWORK_NAME}' not found in PR config"
+fi
+
+log "Extracting network configs for '${NETWORK_NAME}'..."
+MAIN_NETWORK=$(jq ".${NETWORK_NAME}" "$MAIN_CONFIG_FILE")
+PR_NETWORK=$(jq ".${NETWORK_NAME}" "$PR_CONFIG_FILE")
+
+# Extract arrays
+MAIN_BOOTNODES=$(echo "$MAIN_NETWORK" | jq -r '.bootnodes[]' 2>/dev/null || echo "")
+PR_BOOTNODES=$(echo "$PR_NETWORK" | jq -r '.bootnodes[]' 2>/dev/null || echo "")
+
+MAIN_SNAPSHOTS=$(echo "$MAIN_NETWORK" | jq -r '.snapshots[]' 2>/dev/null || echo "")
+PR_SNAPSHOTS=$(echo "$PR_NETWORK" | jq -r '.snapshots[]' 2>/dev/null || echo "")
+
+# Find NEW bootnodes (in PR but not in main)
+log "Diffing bootnodes..."
+NEW_BOOTNODES_ARRAY="[]"
+if [[ -n "$PR_BOOTNODES" ]]; then
+  while IFS= read -r pr_bootnode; do
+    if [[ -n "$pr_bootnode" ]]; then
+      # Check if this bootnode exists in main
+      if ! echo "$MAIN_BOOTNODES" | grep -Fxq "$pr_bootnode"; then
+        NEW_BOOTNODES_ARRAY=$(echo "$NEW_BOOTNODES_ARRAY" | jq --arg bn "$pr_bootnode" '. + [$bn]')
+      fi
+    fi
+  done <<< "$PR_BOOTNODES"
+fi
+
+# Find NEW snapshots (in PR but not in main)
+log "Diffing snapshots..."
+NEW_SNAPSHOTS_ARRAY="[]"
+if [[ -n "$PR_SNAPSHOTS" ]]; then
+  while IFS= read -r pr_snapshot; do
+    if [[ -n "$pr_snapshot" ]]; then
+      # Check if this snapshot exists in main
+      if ! echo "$MAIN_SNAPSHOTS" | grep -Fxq "$pr_snapshot"; then
+        NEW_SNAPSHOTS_ARRAY=$(echo "$NEW_SNAPSHOTS_ARRAY" | jq --arg snap "$pr_snapshot" '. + [$snap]')
+      fi
+    fi
+  done <<< "$PR_SNAPSHOTS"
+fi
+
+# Extract other required fields from PR config
+REGISTRY_ADDRESS=$(echo "$PR_NETWORK" | jq -r '.registryAddress')
+L1_CHAIN_ID=$(echo "$PR_NETWORK" | jq -r '.l1ChainId')
+FEE_ASSET_HANDLER_ADDRESS=$(echo "$PR_NETWORK" | jq -r '.feeAssetHandlerAddress // ""')
+
+# Validate at least one new resource exists
+NEW_BOOTNODE_COUNT=$(echo "$NEW_BOOTNODES_ARRAY" | jq 'length')
+NEW_SNAPSHOT_COUNT=$(echo "$NEW_SNAPSHOTS_ARRAY" | jq 'length')
+
+if [[ "$NEW_BOOTNODE_COUNT" -eq 0 ]] && [[ "$NEW_SNAPSHOT_COUNT" -eq 0 ]]; then
+  die "No new bootnodes or snapshots found in PR. Nothing to validate."
+fi
+
+log "Found $NEW_BOOTNODE_COUNT new bootnode(s) and $NEW_SNAPSHOT_COUNT new snapshot(s)"
+
+# Build output JSON
+jq -n \
+  --argjson bootnodes "$NEW_BOOTNODES_ARRAY" \
+  --argjson snapshots "$NEW_SNAPSHOTS_ARRAY" \
+  --arg registry "$REGISTRY_ADDRESS" \
+  --arg l1ChainId "$L1_CHAIN_ID" \
+  --arg feeAssetHandler "$FEE_ASSET_HANDLER_ADDRESS" \
+  '{
+    new_bootnodes: $bootnodes,
+    new_snapshots: $snapshots,
+    registry_address: $registry,
+    l1_chain_id: $l1ChainId,
+    fee_asset_handler_address: $feeAssetHandler
+  }' > "$DIFF_OUTPUT_FILE"
+
+# Output the result to stdout
+cat "$DIFF_OUTPUT_FILE"
+
+log "Config diff completed successfully"
diff --git a/spartan/scripts/pr_network_validate.sh b/spartan/scripts/pr_network_validate.sh
new file mode 100755
index 000000000000..44d7f72812b7
--- /dev/null
+++ b/spartan/scripts/pr_network_validate.sh
@@ -0,0 +1,399 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Validates a network config PR by deploying 2 nodes in Kubernetes using ONLY the new resources
+# Usage: pr_network_validate.sh <pr_number> <network_name> [cluster]
+
+echo "PR Network Config Validation"
+echo "============================="
+
+spartan=$(git rev-parse --show-toplevel)/spartan
+scripts_dir=$spartan/scripts
+
+# Source required scripts
+source "$scripts_dir/source_env_basic.sh"
+source "$scripts_dir/gcp_auth.sh"
+
+# Basic logging helpers
+log() { echo "[INFO]  $(date -Is) - $*"; }
+err() { echo "[ERROR] $(date -Is) - $*" >&2; }
+die() { err "$*"; exit 1; }
+
+# Check arguments
+if [[ $# -lt 2 ]]; then
+  die "Usage: $0 <pr_number> <network_name> [cluster]"
+fi
+
+PR_NUMBER="$1"
+NETWORK_NAME="$2"
+CLUSTER="${3:-${CLUSTER:-kind}}"
+
+# Configuration
+NAMESPACE="pr-validate-${PR_NUMBER}"
+RELEASE_PREFIX="pr-val-${PR_NUMBER}"
+CLEANUP="${CLEANUP:-true}"
+VALIDATION_TIMEOUT="${VALIDATION_TIMEOUT:-1800}"
+AZTEC_DOCKER_IMAGE="${AZTEC_DOCKER_IMAGE:-aztecprotocol/aztec:latest}"
+
+# Check required environment variables
+if [[ -z "${AZTEC_DOCKER_IMAGE:-}" ]]; then
+  die "AZTEC_DOCKER_IMAGE is not set. Please set it to the image you want to validate."
+fi
+
+log "Configuration:"
+log "  PR Number: ${PR_NUMBER}"
+log "  Network: ${NETWORK_NAME}"
+log "  Cluster: ${CLUSTER}"
+log "  Namespace: ${NAMESPACE}"
+log "  Image: ${AZTEC_DOCKER_IMAGE}"
+
+# Perform GCP auth if not using kind
+if [[ "${CLUSTER}" != "kind" ]]; then
+  log "Authenticating to GCP..."
+  gcp_auth
+fi
+
+# Get kubectl context
+K8S_CLUSTER_CONTEXT=$(kubectl config current-context)
+log "Using kubectl context: ${K8S_CLUSTER_CONTEXT}"
+
+# Step 1: Diff configs to get new resources
+log "Step 1: Diffing network configs..."
+DIFF_OUTPUT=$("$scripts_dir/diff_network_config.sh" "$PR_NUMBER" "$NETWORK_NAME")
+
+if [[ -z "$DIFF_OUTPUT" ]]; then
+  die "Failed to diff network configs"
+fi
+
+log "Diff output:"
+echo "$DIFF_OUTPUT" | jq '.'
+
+# Parse diff output
+NEW_BOOTNODES=$(echo "$DIFF_OUTPUT" | jq -r '.new_bootnodes | join(",")')
+NEW_SNAPSHOTS=$(echo "$DIFF_OUTPUT" | jq -r '.new_snapshots | join(",")')
+REGISTRY_ADDRESS=$(echo "$DIFF_OUTPUT" | jq -r '.registry_address')
+L1_CHAIN_ID=$(echo "$DIFF_OUTPUT" | jq -r '.l1_chain_id')
+FEE_ASSET_HANDLER_ADDRESS=$(echo "$DIFF_OUTPUT" | jq -r '.fee_asset_handler_address // ""')
+
+log "New resources to validate:"
+log "  Bootnodes: ${NEW_BOOTNODES:-<none>}"
+log "  Snapshots: ${NEW_SNAPSHOTS:-<none>}"
+
+# Validate at least one new resource
+if [[ -z "$NEW_BOOTNODES" ]] && [[ -z "$NEW_SNAPSHOTS" ]]; then
+  die "No new bootnodes or snapshots to validate"
+fi
+
+# Step 2: Create namespace
+log "Step 2: Creating namespace ${NAMESPACE}..."
+if kubectl get namespace "${NAMESPACE}" >/dev/null 2>&1; then
+  log "Namespace ${NAMESPACE} already exists. Deleting..."
+  kubectl delete namespace "${NAMESPACE}" --wait=true --timeout=60s || true
+fi
+kubectl create namespace "${NAMESPACE}"
+
+# Step 3: Get L1 configuration
+log "Step 3: Setting up L1 configuration..."
+
+# Determine L1 network based on the Aztec network
+# mainnet uses mainnet L1, everything else uses sepolia
+if [[ "$NETWORK_NAME" == "mainnet" ]]; then
+  L1_NETWORK="mainnet"
+else
+  L1_NETWORK="sepolia"
+fi
+
+# Fetch L1 endpoints from GCP secrets if not already set
+if [[ -z "${ETHEREUM_RPC_URLS:-}" ]] && [[ -z "${L1_RPC_URLS:-}" ]]; then
+  log "Fetching L1 RPC URLs from GCP secret: ${L1_NETWORK}-rpc-urls"
+  ETHEREUM_RPC_URLS=$(gcloud secrets versions access latest --secret="${L1_NETWORK}-rpc-urls" --project="${GCP_PROJECT_ID:-testnet-440309}" 2>/dev/null || echo "")
+  if [[ -z "$ETHEREUM_RPC_URLS" ]]; then
+    die "Failed to fetch ${L1_NETWORK}-rpc-urls from GCP secrets. Ensure you're authenticated."
+  fi
+fi
+
+if [[ -z "${ETHEREUM_CONSENSUS_HOST_URLS:-}" ]] && [[ -z "${L1_CONSENSUS_HOST_URLS:-}" ]]; then
+  log "Fetching L1 Consensus URLs from GCP secret: ${L1_NETWORK}-consensus-host-urls"
+  ETHEREUM_CONSENSUS_HOST_URLS=$(gcloud secrets versions access latest --secret="${L1_NETWORK}-consensus-host-urls" --project="${GCP_PROJECT_ID:-testnet-440309}" 2>/dev/null || echo "")
+  if [[ -z "$ETHEREUM_CONSENSUS_HOST_URLS" ]]; then
+    die "Failed to fetch ${L1_NETWORK}-consensus-host-urls from GCP secrets. Ensure you're authenticated."
+  fi
+fi
+
+# L1 endpoints - required for nodes to sync
+L1_RPC_URLS="${ETHEREUM_RPC_URLS:-${L1_RPC_URLS:-}}"
+L1_CONSENSUS_URLS="${ETHEREUM_CONSENSUS_HOST_URLS:-${L1_CONSENSUS_HOST_URLS:-}}"
+
+if [[ -z "$L1_RPC_URLS" ]]; then
+  die "L1_RPC_URLS or ETHEREUM_RPC_URLS must be set"
+fi
+
+if [[ -z "$L1_CONSENSUS_URLS" ]]; then
+  die "L1_CONSENSUS_URLS or ETHEREUM_CONSENSUS_HOST_URLS must be set"
+fi
+
+# Convert JSON arrays to comma-separated if needed
+if [[ "$L1_RPC_URLS" == "["* ]]; then
+  L1_RPC_URLS=$(echo "$L1_RPC_URLS" | jq -r 'join(",")')
+fi
+
+if [[ "$L1_CONSENSUS_URLS" == "["* ]]; then
+  L1_CONSENSUS_URLS=$(echo "$L1_CONSENSUS_URLS" | jq -r 'join(",")')
+fi
+
+log "  L1 RPC URLs: ${L1_RPC_URLS}"
+log "  L1 Consensus URLs: ${L1_CONSENSUS_URLS}"
+
+# Step 4: Create Helm values file
+log "Step 4: Creating Helm values..."
+TMP_DIR=$(mktemp -d)
+trap "rm -rf $TMP_DIR" EXIT
+
+HELM_VALUES_FILE="${TMP_DIR}/pr-validate-values.yaml"
+
+cat > "$HELM_VALUES_FILE" << EOF
+replicaCount: 2
+
+global:
+  aztecNetwork: ""  # Don't use predefined network
+  customAztecNetwork:
+    l1ChainId: ${L1_CHAIN_ID}
+    registryContractAddress: "${REGISTRY_ADDRESS}"
+    feeAssetHandlerContractAddress: "${FEE_ASSET_HANDLER_ADDRESS}"
+
+  l1ExecutionUrls:
+$(echo "$L1_RPC_URLS" | tr ',' '\n' | while read url; do echo "    - \"$url\""; done)
+
+  l1ConsensusUrls:
+$(echo "$L1_CONSENSUS_URLS" | tr ',' '\n' | while read url; do echo "    - \"$url\""; done)
+
+  aztecImage:
+    repository: $(echo "$AZTEC_DOCKER_IMAGE" | cut -d: -f1)
+    tag: "$(echo "$AZTEC_DOCKER_IMAGE" | cut -d: -f2)"
+    pullPolicy: IfNotPresent
+
+  sponsoredFPC: false
+  testAccounts: false
+
+node:
+  logLevel: "debug"
+
+  startCmd:
+    - --node
+    - --archiver
+
+  env:
+    # Override with ONLY new resources - this is the key to isolated testing
+    NETWORK_CONFIG_LOCATION: ""  # Disable remote network config fetch
+    BOOTSTRAP_NODES: "${NEW_BOOTNODES}"
+    SYNC_SNAPSHOTS_URLS: "${NEW_SNAPSHOTS}"
+    SYNC_MODE: force-snapshot
+    L1_CHAIN_ID: "${L1_CHAIN_ID}"
+    REGISTRY_CONTRACT_ADDRESS: "${REGISTRY_ADDRESS}"
+    FEE_ASSET_HANDLER_CONTRACT_ADDRESS: "${FEE_ASSET_HANDLER_ADDRESS:-0x0000000000000000000000000000000000000000}"
+    LOG_LEVEL: "debug"
+EOF
+
+log "Helm values created:"
+cat "$HELM_VALUES_FILE"
+
+# Step 5: Deploy with Helm
+log "Step 5: Deploying validation nodes..."
+helm upgrade --install \
+  "${RELEASE_PREFIX}" \
+  "$spartan/aztec-node" \
+  --namespace "${NAMESPACE}" \
+  --values "$HELM_VALUES_FILE" \
+  --timeout 15m
+
+log "Deployment complete. Waiting for pods to start..."
+
+# Step 6: Wait for pods to exist (not Ready - we need to check logs immediately)
+log "Step 6: Waiting for pods to be created..."
+for i in {1..60}; do
+  POD_COUNT=$(kubectl get pods -n "${NAMESPACE}" -l "app.kubernetes.io/instance=${RELEASE_PREFIX}" --no-headers 2>/dev/null | wc -l)
+  if [[ "$POD_COUNT" -ge 2 ]]; then
+    log "Pods created!"
+    break
+  fi
+  if [[ $i -eq 60 ]]; then
+    die "Timeout waiting for pods to be created"
+  fi
+  sleep 2
+done
+
+# Get pod names
+POD_0=$(kubectl get pods -n "${NAMESPACE}" -l "app.kubernetes.io/instance=${RELEASE_PREFIX}" -o jsonpath='{.items[0].metadata.name}')
+POD_1=$(kubectl get pods -n "${NAMESPACE}" -l "app.kubernetes.io/instance=${RELEASE_PREFIX}" -o jsonpath='{.items[1].metadata.name}')
+
+log "Validation pods: ${POD_0}, ${POD_1}"
+log "Starting validation checks (will monitor logs as pods start)..."
+
+# Step 7: Validation checks
+log "Step 7: Running validation checks..."
+
+VALIDATION_START=$(date +%s)
+SUCCESS=false
+
+# Function to check logs for P2P connection
+check_p2p_connection() {
+  local pod=$1
+  kubectl logs -n "${NAMESPACE}" "$pod" --tail=100 2>/dev/null | grep -i "peer.*connected\|discovered peer" || true
+}
+
+# Function to check logs for snapshot download
+check_snapshot_download() {
+  local pod=$1
+  kubectl logs -n "${NAMESPACE}" "$pod" --tail=100 2>/dev/null | grep -i "snapshot.*download\|syncing from snapshot\|downloading snapshot" || true
+}
+
+# Function to check if node is syncing from L1 (fallback - should fail if snapshots expected)
+check_l1_sync() {
+  local pod=$1
+  kubectl logs -n "${NAMESPACE}" "$pod" --tail=200 2>/dev/null | grep -i "syncing from l1\|catching up from l1\|starting archiver\|archiver sync" || true
+}
+
+# Function to check for snapshot failures
+check_snapshot_failure() {
+  local pod=$1
+  # Look for the critical failure message that means all snapshots failed
+  kubectl logs -n "${NAMESPACE}" "$pod" --tail=200 2>/dev/null | grep -i "No valid snapshots found from any URL, skipping snapshot sync\|No snapshot found at.*Skipping this URL\|Fetching.*failed\. Will retry" || true
+}
+
+# Function to check node status
+check_node_status() {
+  local pod=$1
+  kubectl exec -n "${NAMESPACE}" "$pod" -- curl -s http://localhost:8080/status || echo "{}"
+}
+
+log "Monitoring validation (timeout: ${VALIDATION_TIMEOUT}s)..."
+
+while true; do
+  CURRENT_TIME=$(date +%s)
+  ELAPSED=$((CURRENT_TIME - VALIDATION_START))
+
+  if [[ $ELAPSED -gt $VALIDATION_TIMEOUT ]]; then
+    err "Validation timeout reached (${VALIDATION_TIMEOUT}s)"
+    break
+  fi
+
+  log "Check iteration (${ELAPSED}s elapsed)..."
+
+  # CRITICAL CHECK: If new snapshots exist, ensure nodes are NOT syncing from L1
+  if [[ -n "$NEW_SNAPSHOTS" ]]; then
+    log "  Checking for L1 sync fallback (should NOT happen with new snapshots)..."
+    L1_SYNC_POD_0=$(check_l1_sync "$POD_0")
+    L1_SYNC_POD_1=$(check_l1_sync "$POD_1")
+
+    if [[ -n "$L1_SYNC_POD_0" ]] || [[ -n "$L1_SYNC_POD_1" ]]; then
+      err "  ✗ FAILURE: Nodes are syncing from L1 instead of using snapshots!"
+      if [[ -n "$L1_SYNC_POD_0" ]]; then
+        err "    Pod 0: $L1_SYNC_POD_0"
+      fi
+      if [[ -n "$L1_SYNC_POD_1" ]]; then
+        err "    Pod 1: $L1_SYNC_POD_1"
+      fi
+      err "  This indicates the snapshot URL is broken or unreachable."
+      SUCCESS=false
+      break
+    fi
+
+    # Check for explicit snapshot failures
+    SNAPSHOT_FAIL_POD_0=$(check_snapshot_failure "$POD_0")
+    SNAPSHOT_FAIL_POD_1=$(check_snapshot_failure "$POD_1")
+
+    if [[ -n "$SNAPSHOT_FAIL_POD_0" ]] || [[ -n "$SNAPSHOT_FAIL_POD_1" ]]; then
+      err "  ✗ FAILURE: Snapshot download failed!"
+      if [[ -n "$SNAPSHOT_FAIL_POD_0" ]]; then
+        err "    Pod 0: $SNAPSHOT_FAIL_POD_0"
+      fi
+      if [[ -n "$SNAPSHOT_FAIL_POD_1" ]]; then
+        err "    Pod 1: $SNAPSHOT_FAIL_POD_1"
+      fi
+      SUCCESS=false
+      break
+    fi
+  fi
+
+  # Check 1: P2P discovery
+  log "  Checking P2P connections..."
+  P2P_POD_0=$(check_p2p_connection "$POD_0")
+  P2P_POD_1=$(check_p2p_connection "$POD_1")
+
+  if [[ -n "$P2P_POD_0" ]] || [[ -n "$P2P_POD_1" ]]; then
+    log "  ✓ P2P connection detected!"
+    if [[ -n "$P2P_POD_0" ]]; then
+      log "    Pod 0: $P2P_POD_0"
+    fi
+    if [[ -n "$P2P_POD_1" ]]; then
+      log "    Pod 1: $P2P_POD_1"
+    fi
+
+    # If we only have new snapshots (no new bootnodes), P2P is not required
+    if [[ -n "$NEW_BOOTNODES" ]]; then
+      SUCCESS=true
+      break
+    fi
+  fi
+
+  # Check 2: Snapshot download (if new snapshots exist)
+  if [[ -n "$NEW_SNAPSHOTS" ]]; then
+    log "  Checking snapshot downloads..."
+    SNAPSHOT_POD_0=$(check_snapshot_download "$POD_0")
+    SNAPSHOT_POD_1=$(check_snapshot_download "$POD_1")
+
+    if [[ -n "$SNAPSHOT_POD_0" ]] || [[ -n "$SNAPSHOT_POD_1" ]]; then
+      log "  ✓ Snapshot download detected!"
+      if [[ -n "$SNAPSHOT_POD_0" ]]; then
+        log "    Pod 0: $SNAPSHOT_POD_0"
+      fi
+      if [[ -n "$SNAPSHOT_POD_1" ]]; then
+        log "    Pod 1: $SNAPSHOT_POD_1"
+      fi
+      SUCCESS=true
+      break
+    fi
+  fi
+
+  log "  Waiting 10s before next check..."
+  sleep 10
+done
+
+# Step 8: Report results
+log "Step 8: Validation complete"
+
+if [[ "$SUCCESS" == "true" ]]; then
+  log "✓ VALIDATION PASSED"
+  log "  - New bootnodes: ${NEW_BOOTNODES:-<none>}"
+  log "  - New snapshots: ${NEW_SNAPSHOTS:-<none>}"
+  log "  - Nodes successfully used new resources"
+else
+  err "✗ VALIDATION FAILED"
+  err "  - Could not verify nodes are using new resources"
+  err "  - Check logs below for details"
+
+  # Dump logs for debugging
+  log "Pod 0 logs (last 50 lines):"
+  kubectl logs -n "${NAMESPACE}" "$POD_0" --tail=50 || true
+
+  log "Pod 1 logs (last 50 lines):"
+  kubectl logs -n "${NAMESPACE}" "$POD_1" --tail=50 || true
+fi
+
+# Step 9: Cleanup
+if [[ "$CLEANUP" == "true" ]]; then
+  log "Step 9: Cleaning up namespace ${NAMESPACE}..."
+  kubectl delete namespace "${NAMESPACE}" --wait=true --timeout=60s || true
+  log "Cleanup complete"
+else
+  log "Step 9: Skipping cleanup (CLEANUP=false)"
+  log "  To clean up manually: kubectl delete namespace ${NAMESPACE}"
+fi
+
+# Exit with appropriate code
+if [[ "$SUCCESS" == "true" ]]; then
+  exit 0
+else
+  exit 1
+fi

From 35490ba1878339c7c3fc7000aa93b954fcd330b2 Mon Sep 17 00:00:00 2001
From: Alex Gherghisan <alexghr@users.noreply.github.com>
Date: Thu, 27 Nov 2025 13:17:54 +0000
Subject: [PATCH 2/2] chore: update script

---
 spartan/scripts/pr_network_validate.sh | 235 +++++++++++++------------
 1 file changed, 125 insertions(+), 110 deletions(-)

diff --git a/spartan/scripts/pr_network_validate.sh b/spartan/scripts/pr_network_validate.sh
index 44d7f72812b7..3cb4b8f66cfb 100755
--- a/spartan/scripts/pr_network_validate.sh
+++ b/spartan/scripts/pr_network_validate.sh
@@ -58,8 +58,7 @@ fi
 K8S_CLUSTER_CONTEXT=$(kubectl config current-context)
 log "Using kubectl context: ${K8S_CLUSTER_CONTEXT}"
 
-# Step 1: Diff configs to get new resources
-log "Step 1: Diffing network configs..."
+log "Diffing network configs..."
 DIFF_OUTPUT=$("$scripts_dir/diff_network_config.sh" "$PR_NUMBER" "$NETWORK_NAME")
 
 if [[ -z "$DIFF_OUTPUT" ]]; then
@@ -85,16 +84,14 @@ if [[ -z "$NEW_BOOTNODES" ]] && [[ -z "$NEW_SNAPSHOTS" ]]; then
   die "No new bootnodes or snapshots to validate"
 fi
 
-# Step 2: Create namespace
-log "Step 2: Creating namespace ${NAMESPACE}..."
+log "Creating namespace ${NAMESPACE}..."
 if kubectl get namespace "${NAMESPACE}" >/dev/null 2>&1; then
   log "Namespace ${NAMESPACE} already exists. Deleting..."
   kubectl delete namespace "${NAMESPACE}" --wait=true --timeout=60s || true
 fi
 kubectl create namespace "${NAMESPACE}"
 
-# Step 3: Get L1 configuration
-log "Step 3: Setting up L1 configuration..."
+log "Setting up L1 configuration..."
 
 # Determine L1 network based on the Aztec network
 # mainnet uses mainnet L1, everything else uses sepolia
@@ -145,8 +142,7 @@ fi
 log "  L1 RPC URLs: ${L1_RPC_URLS}"
 log "  L1 Consensus URLs: ${L1_CONSENSUS_URLS}"
 
-# Step 4: Create Helm values file
-log "Step 4: Creating Helm values..."
+log "Creating Helm values..."
 TMP_DIR=$(mktemp -d)
 trap "rm -rf $TMP_DIR" EXIT
 
@@ -183,6 +179,13 @@ node:
     - --node
     - --archiver
 
+  # Enable P2P with node port to get public IP
+  p2p:
+    enabled: true
+    publicIP: true
+    port: 40400
+    announcePort: 40400
+
   env:
     # Override with ONLY new resources - this is the key to isolated testing
     NETWORK_CONFIG_LOCATION: ""  # Disable remote network config fetch
@@ -198,8 +201,7 @@ EOF
 log "Helm values created:"
 cat "$HELM_VALUES_FILE"
 
-# Step 5: Deploy with Helm
-log "Step 5: Deploying validation nodes..."
+log "Deploying validation nodes..."
 helm upgrade --install \
   "${RELEASE_PREFIX}" \
   "$spartan/aztec-node" \
@@ -209,8 +211,7 @@ helm upgrade --install \
 
 log "Deployment complete. Waiting for pods to start..."
 
-# Step 6: Wait for pods to exist (not Ready - we need to check logs immediately)
-log "Step 6: Waiting for pods to be created..."
+log "Waiting for pods to be created..."
 for i in {1..60}; do
   POD_COUNT=$(kubectl get pods -n "${NAMESPACE}" -l "app.kubernetes.io/instance=${RELEASE_PREFIX}" --no-headers 2>/dev/null | wc -l)
   if [[ "$POD_COUNT" -ge 2 ]]; then
@@ -230,35 +231,74 @@ POD_1=$(kubectl get pods -n "${NAMESPACE}" -l "app.kubernetes.io/instance=${RELE
 log "Validation pods: ${POD_0}, ${POD_1}"
 log "Starting validation checks (will monitor logs as pods start)..."
 
-# Step 7: Validation checks
-log "Step 7: Running validation checks..."
+log "Running validation checks..."
 
 VALIDATION_START=$(date +%s)
-SUCCESS=false
+P2P_SUCCESS=false
+SNAPSHOT_SUCCESS=false
+
+# Determine what we need to validate
+NEED_P2P_CHECK=false
+NEED_SNAPSHOT_CHECK=false
+
+if [[ -n "$NEW_BOOTNODES" ]]; then
+  NEED_P2P_CHECK=true
+  log "Will validate P2P discovery via new bootnode(s)"
+fi
+
+if [[ -n "$NEW_SNAPSHOTS" ]]; then
+  NEED_SNAPSHOT_CHECK=true
+  log "Will validate snapshot download from new URL(s)"
+fi
 
 # Function to check logs for P2P connection
 check_p2p_connection() {
   local pod=$1
-  kubectl logs -n "${NAMESPACE}" "$pod" --tail=100 2>/dev/null | grep -i "peer.*connected\|discovered peer" || true
+  # Look for actual peer connections, not "Connected to 0 peers"
+  # Match patterns like:
+  # - "Connected to X peers" where X > 0
+  # - "peer abc123 connected"
+  # - "discovered peer xyz789"
+  local logs=$(kubectl logs -n "${NAMESPACE}" "$pod" --tail=200 2>/dev/null)
+
+  # Check for "Connected to X peers" where X > 0
+  if echo "$logs" | grep -qE "Connected to [1-9][0-9]* peer"; then
+    echo "$logs" | grep -E "Connected to [1-9][0-9]* peer"
+    return 0
+  fi
+
+  # Check for specific peer connection messages (but exclude "Connected to 0 peers")
+  echo "$logs" | grep -iE "(peer [a-zA-Z0-9]+ connected|discovered peer [a-zA-Z0-9]+|connection established.*peer)" | grep -v "Connected to 0 peer" || true
 }
 
 # Function to check logs for snapshot download
+# Returns: "success", "failed", or "unknown"
 check_snapshot_download() {
   local pod=$1
-  kubectl logs -n "${NAMESPACE}" "$pod" --tail=100 2>/dev/null | grep -i "snapshot.*download\|syncing from snapshot\|downloading snapshot" || true
-}
+  local logs=$(kubectl logs -n "${NAMESPACE}" "$pod" --tail=200 2>/dev/null)
 
-# Function to check if node is syncing from L1 (fallback - should fail if snapshots expected)
-check_l1_sync() {
-  local pod=$1
-  kubectl logs -n "${NAMESPACE}" "$pod" --tail=200 2>/dev/null | grep -i "syncing from l1\|catching up from l1\|starting archiver\|archiver sync" || true
-}
+  # Check for failure messages first
+  if echo "$logs" | grep -qi "No valid snapshots found from any URL, skipping snapshot sync"; then
+    echo "failed"
+    return
+  fi
 
-# Function to check for snapshot failures
-check_snapshot_failure() {
-  local pod=$1
-  # Look for the critical failure message that means all snapshots failed
-  kubectl logs -n "${NAMESPACE}" "$pod" --tail=200 2>/dev/null | grep -i "No valid snapshots found from any URL, skipping snapshot sync\|No snapshot found at.*Skipping this URL\|Fetching.*failed\. Will retry" || true
+  if echo "$logs" | grep -qi "No snapshot found at.*Skipping this URL"; then
+    # This might be trying multiple URLs, check if ALL failed
+    if echo "$logs" | grep -qi "No valid snapshots found from any URL"; then
+      echo "failed"
+      return
+    fi
+  fi
+
+  # Check for success messages
+  if echo "$logs" | grep -qi "snapshot.*download\|syncing from snapshot\|downloading snapshot"; then
+    echo "success"
+    return
+  fi
+
+  # Neither success nor failure detected yet
+  echo "unknown"
 }
 
 # Function to check node status
@@ -280,119 +320,94 @@ while true; do
 
   log "Check iteration (${ELAPSED}s elapsed)..."
 
-  # CRITICAL CHECK: If new snapshots exist, ensure nodes are NOT syncing from L1
-  if [[ -n "$NEW_SNAPSHOTS" ]]; then
-    log "  Checking for L1 sync fallback (should NOT happen with new snapshots)..."
-    L1_SYNC_POD_0=$(check_l1_sync "$POD_0")
-    L1_SYNC_POD_1=$(check_l1_sync "$POD_1")
+  if [[ "$NEED_SNAPSHOT_CHECK" == "true" ]] && [[ "$SNAPSHOT_SUCCESS" == "false" ]]; then
+    log "  Checking snapshot downloads..."
+    SNAPSHOT_STATUS_POD_0=$(check_snapshot_download "$POD_0")
+    SNAPSHOT_STATUS_POD_1=$(check_snapshot_download "$POD_1")
 
-    if [[ -n "$L1_SYNC_POD_0" ]] || [[ -n "$L1_SYNC_POD_1" ]]; then
-      err "  ✗ FAILURE: Nodes are syncing from L1 instead of using snapshots!"
-      if [[ -n "$L1_SYNC_POD_0" ]]; then
-        err "    Pod 0: $L1_SYNC_POD_0"
+    if [[ "$SNAPSHOT_STATUS_POD_0" == "failed" ]] || [[ "$SNAPSHOT_STATUS_POD_1" == "failed" ]]; then
+      err "  ✗ FAILURE: Snapshot download failed!"
+      if [[ "$SNAPSHOT_STATUS_POD_0" == "failed" ]]; then
+        err "    Pod 0: Snapshot failed"
+        kubectl logs -n "${NAMESPACE}" "$POD_0" --tail=50 2>/dev/null | grep -i "snapshot\|No valid snapshots" | while IFS= read -r line; do
+          err "      $line"
+        done
       fi
-      if [[ -n "$L1_SYNC_POD_1" ]]; then
-        err "    Pod 1: $L1_SYNC_POD_1"
+      if [[ "$SNAPSHOT_STATUS_POD_1" == "failed" ]]; then
+        err "    Pod 1: Snapshot failed"
+        kubectl logs -n "${NAMESPACE}" "$POD_1" --tail=50 2>/dev/null | grep -i "snapshot\|No valid snapshots" | while IFS= read -r line; do
+          err "      $line"
+        done
       fi
-      err "  This indicates the snapshot URL is broken or unreachable."
-      SUCCESS=false
       break
     fi
 
-    # Check for explicit snapshot failures
-    SNAPSHOT_FAIL_POD_0=$(check_snapshot_failure "$POD_0")
-    SNAPSHOT_FAIL_POD_1=$(check_snapshot_failure "$POD_1")
-
-    if [[ -n "$SNAPSHOT_FAIL_POD_0" ]] || [[ -n "$SNAPSHOT_FAIL_POD_1" ]]; then
-      err "  ✗ FAILURE: Snapshot download failed!"
-      if [[ -n "$SNAPSHOT_FAIL_POD_0" ]]; then
-        err "    Pod 0: $SNAPSHOT_FAIL_POD_0"
+    if [[ "$SNAPSHOT_STATUS_POD_0" == "success" ]] || [[ "$SNAPSHOT_STATUS_POD_1" == "success" ]]; then
+      log "  ✓ Snapshot download detected!"
+      if [[ "$SNAPSHOT_STATUS_POD_0" == "success" ]]; then
+        log "    Pod 0: Snapshot download started"
+        kubectl logs -n "${NAMESPACE}" "$POD_0" --tail=50 2>/dev/null | grep -i "snapshot.*download\|syncing from snapshot" | while IFS= read -r line; do
+          log "      $line"
+        done
       fi
-      if [[ -n "$SNAPSHOT_FAIL_POD_1" ]]; then
-        err "    Pod 1: $SNAPSHOT_FAIL_POD_1"
+      if [[ "$SNAPSHOT_STATUS_POD_1" == "success" ]]; then
+        log "    Pod 1: Snapshot download started"
+        kubectl logs -n "${NAMESPACE}" "$POD_1" --tail=50 2>/dev/null | grep -i "snapshot.*download\|syncing from snapshot" | while IFS= read -r line; do
+          log "      $line"
+        done
       fi
-      SUCCESS=false
-      break
+      SNAPSHOT_SUCCESS=true
     fi
-  fi
-
-  # Check 1: P2P discovery
-  log "  Checking P2P connections..."
-  P2P_POD_0=$(check_p2p_connection "$POD_0")
-  P2P_POD_1=$(check_p2p_connection "$POD_1")
 
-  if [[ -n "$P2P_POD_0" ]] || [[ -n "$P2P_POD_1" ]]; then
-    log "  ✓ P2P connection detected!"
-    if [[ -n "$P2P_POD_0" ]]; then
-      log "    Pod 0: $P2P_POD_0"
-    fi
-    if [[ -n "$P2P_POD_1" ]]; then
-      log "    Pod 1: $P2P_POD_1"
-    fi
-
-    # If we only have new snapshots (no new bootnodes), P2P is not required
-    if [[ -n "$NEW_BOOTNODES" ]]; then
-      SUCCESS=true
-      break
+    if [[ "$SNAPSHOT_STATUS_POD_0" == "unknown" ]] && [[ "$SNAPSHOT_STATUS_POD_1" == "unknown" ]]; then
+      log "    Snapshot status: still waiting..."
     fi
   fi
 
-  # Check 2: Snapshot download (if new snapshots exist)
-  if [[ -n "$NEW_SNAPSHOTS" ]]; then
-    log "  Checking snapshot downloads..."
-    SNAPSHOT_POD_0=$(check_snapshot_download "$POD_0")
-    SNAPSHOT_POD_1=$(check_snapshot_download "$POD_1")
 
-    if [[ -n "$SNAPSHOT_POD_0" ]] || [[ -n "$SNAPSHOT_POD_1" ]]; then
-      log "  ✓ Snapshot download detected!"
-      if [[ -n "$SNAPSHOT_POD_0" ]]; then
-        log "    Pod 0: $SNAPSHOT_POD_0"
+  if [[ "$NEED_P2P_CHECK" == "true" ]] && [[ "$P2P_SUCCESS" == "false" ]]; then
+    log "  Checking P2P connections..."
+    P2P_POD_0=$(check_p2p_connection "$POD_0")
+    P2P_POD_1=$(check_p2p_connection "$POD_1")
+
+    if [[ -n "$P2P_POD_0" ]] || [[ -n "$P2P_POD_1" ]]; then
+      log "  ✓ P2P connection detected!"
+      if [[ -n "$P2P_POD_0" ]]; then
+        log "    Pod 0 logs:"
+        echo "$P2P_POD_0" | while IFS= read -r line; do
+          log "      $line"
+        done
       fi
-      if [[ -n "$SNAPSHOT_POD_1" ]]; then
-        log "    Pod 1: $SNAPSHOT_POD_1"
+      if [[ -n "$P2P_POD_1" ]]; then
+        log "    Pod 1 logs:"
+        echo "$P2P_POD_1" | while IFS= read -r line; do
+          log "      $line"
+        done
       fi
-      SUCCESS=true
-      break
+      P2P_SUCCESS=true
     fi
   fi
 
+  if [[ "$NEED_P2P_CHECK" == "false" || "$P2P_SUCCESS" == "true" ]] && [[ "$NEED_SNAPSHOT_CHECK" == "false" || "$SNAPSHOT_SUCCESS" == "true" ]]; then
+    log "  ✓ All validation checks passed!"
+    break
+  fi
+
   log "  Waiting 10s before next check..."
   sleep 10
 done
 
-# Step 8: Report results
-log "Step 8: Validation complete"
-
-if [[ "$SUCCESS" == "true" ]]; then
-  log "✓ VALIDATION PASSED"
-  log "  - New bootnodes: ${NEW_BOOTNODES:-<none>}"
-  log "  - New snapshots: ${NEW_SNAPSHOTS:-<none>}"
-  log "  - Nodes successfully used new resources"
-else
-  err "✗ VALIDATION FAILED"
-  err "  - Could not verify nodes are using new resources"
-  err "  - Check logs below for details"
-
-  # Dump logs for debugging
-  log "Pod 0 logs (last 50 lines):"
-  kubectl logs -n "${NAMESPACE}" "$POD_0" --tail=50 || true
-
-  log "Pod 1 logs (last 50 lines):"
-  kubectl logs -n "${NAMESPACE}" "$POD_1" --tail=50 || true
-fi
 
-# Step 9: Cleanup
 if [[ "$CLEANUP" == "true" ]]; then
-  log "Step 9: Cleaning up namespace ${NAMESPACE}..."
+  log "Cleaning up namespace ${NAMESPACE}..."
   kubectl delete namespace "${NAMESPACE}" --wait=true --timeout=60s || true
   log "Cleanup complete"
 else
-  log "Step 9: Skipping cleanup (CLEANUP=false)"
+  log "Skipping cleanup (CLEANUP=false)"
   log "  To clean up manually: kubectl delete namespace ${NAMESPACE}"
 fi
 
-# Exit with appropriate code
-if [[ "$SUCCESS" == "true" ]]; then
+if [[ "$NEED_P2P_CHECK" == "false" || "$P2P_SUCCESS" == "true" ]] && [[ "$NEED_SNAPSHOT_CHECK" == "false" || "$SNAPSHOT_SUCCESS" == "true" ]]; then
   exit 0
 else
   exit 1