From f76a83ad3a73373860dc9e842653e5d65f9d68fd Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 14:03:10 -0700 Subject: [PATCH 1/7] feat(controller/node): resolve label peers to NLB addresses + compose node_id controller-side MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move node_id resolution for the LabelPeerSource flow from the sidecar's :26657/status query to the controller's existing per-peer sidecar gRPC. Status.ResolvedPeers wire format shifts to fully-composed @: strings. Why: the sidecar's :26657/status path is in-cluster-only (NLBs only forward 26656). After PR #365, a peer's Spec.ExternalAddress is the publishable vanity hostname. Using that address in ResolvedPeers means the sidecar can no longer query :26657 to learn node_id externally. The controller already has clean in-cluster access to each peer's sidecar gRPC (port 7777) — same path the genesis CollectAndSetPeers uses to learn node_id from node_key.json. Changes: - internal/controller/node/peers.go: extend resolveLabelPeers to call the peer's sidecar via r.Planner.BuildSidecarClient + GetNodeID, and compose @:. host is Spec.ExternalAddress when set, else headless Service DNS at p2p port. On per-peer failure the whole resolve errors and controller-runtime retries — matches the genesis path semantics. - internal/planner/planner.go: Label sources route to PeerSourceStatic (same source path the genesis-Static flow already uses) instead of PeerSourceDNSEndpoints. The sidecar's DNSEndpointsSource is unchanged and still serves EC2Tags peers. - internal/task/task.go: widen task.SidecarClient interface with GetNodeID(ctx) (string, error). The full *sidecar.SidecarClient already implements this; mocks in tests get one-line stubs. - api/v1alpha1/seinode_types.go: ResolvedPeers godoc updated for the new composed wire format. Tests: - New TestReconcilePeers_PrefersExternalAddress covers the Spec.ExternalAddress branch. - Existing peers tests updated for the new composed format. - Envtest StubSidecarClient gains a GetNodeID stub returning "stub-node-id". Suite still passes (~64s). Wire-format change to Status.ResolvedPeers is the only one-way door. The field was previously bare hosts; consumers are the planner only, which is updated in this same PR. Refs sei-protocol/sei-k8s-controller#368 Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/seinode_types.go | 7 +-- config/crd/sei.io_seinodes.yaml | 7 +-- internal/controller/node/peers.go | 33 ++++++++++-- internal/controller/node/peers_test.go | 54 ++++++++++++++++--- .../controller/node/plan_execution_test.go | 7 +++ internal/controller/node/reconciler_test.go | 6 ++- .../nodedeployment/envtest/stubs.go | 6 +++ .../controller/nodetask/controller_test.go | 3 +- internal/planner/executor_test.go | 4 ++ internal/planner/planner.go | 7 ++- internal/planner/sidecar_probe_test.go | 3 +- internal/task/task.go | 7 +-- manifests/sei.io_seinodes.yaml | 7 +-- 13 files changed, 122 insertions(+), 29 deletions(-) diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 07b98c35..85fd0b17 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -347,9 +347,10 @@ type SeiNodeStatus struct { // +optional Plan *TaskPlan `json:"plan,omitempty"` - // ResolvedPeers is the current set of peer DNS hostnames discovered - // from label-based peer sources. Reconciled continuously so that - // future peer-update plans can detect drift. + // ResolvedPeers is the current set of `@:` peer + // strings discovered from label-based peer sources, ready to write + // into CometBFT's persistent_peers verbatim. Host is the peer's + // Spec.ExternalAddress when set, otherwise the headless Service DNS. // +optional ResolvedPeers []string `json:"resolvedPeers,omitempty"` diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 8b2f86e2..761b419a 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -957,9 +957,10 @@ spec: type: object resolvedPeers: description: |- - ResolvedPeers is the current set of peer DNS hostnames discovered - from label-based peer sources. Reconciled continuously so that - future peer-update plans can detect drift. + ResolvedPeers is the current set of `@:` peer + strings discovered from label-based peer sources, ready to write + into CometBFT's persistent_peers verbatim. Host is the peer's + Spec.ExternalAddress when set, otherwise the headless Service DNS. items: type: string type: array diff --git a/internal/controller/node/peers.go b/internal/controller/node/peers.go index 91f4e8ec..ea3d6877 100644 --- a/internal/controller/node/peers.go +++ b/internal/controller/node/peers.go @@ -5,6 +5,7 @@ import ( "fmt" "slices" + seiconfig "github.com/sei-protocol/sei-config" "sigs.k8s.io/controller-runtime/pkg/client" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" @@ -32,8 +33,11 @@ func (r *SeiNodeReconciler) reconcilePeers(ctx context.Context, node *seiv1alpha return nil } -// resolveLabelPeers lists SeiNode resources matching the label selector -// and returns their stable headless Service DNS hostnames. +// resolveLabelPeers lists SeiNodes matching the selector and returns +// fully-composed `@:` strings. The host is each +// peer's Spec.ExternalAddress when set, otherwise the headless Service +// DNS. node_id is fetched from each peer's sidecar via gRPC — the same +// in-cluster path the genesis CollectAndSetPeers task uses. func (r *SeiNodeReconciler) resolveLabelPeers( ctx context.Context, node *seiv1alpha1.SeiNode, @@ -58,9 +62,28 @@ func (r *SeiNodeReconciler) resolveLabelPeers( if peer.Name == node.Name && peer.Namespace == node.Namespace { continue } - dns := fmt.Sprintf("%s-0.%s.%s.svc.cluster.local", - peer.Name, peer.Name, peer.Namespace) - endpoints = append(endpoints, dns) + + sc, err := r.Planner.BuildSidecarClient(peer) + if err != nil { + return nil, fmt.Errorf("building sidecar client for peer %s: %w", peer.Name, err) + } + nodeID, err := sc.GetNodeID(ctx) + if err != nil { + return nil, fmt.Errorf("fetching node_id for peer %s: %w", peer.Name, err) + } + + endpoints = append(endpoints, fmt.Sprintf("%s@%s", nodeID, peerAddress(peer))) } return endpoints, nil } + +// peerAddress is the dial address for a peer: Spec.ExternalAddress (already +// host:port from the SND publishable path) when set, otherwise the headless +// Service DNS at the standard P2P port. +func peerAddress(peer *seiv1alpha1.SeiNode) string { + if peer.Spec.ExternalAddress != "" { + return peer.Spec.ExternalAddress + } + return fmt.Sprintf("%s-0.%s.%s.svc.cluster.local:%d", + peer.Name, peer.Name, peer.Namespace, seiconfig.PortP2P) +} diff --git a/internal/controller/node/peers_test.go b/internal/controller/node/peers_test.go index 37a8a706..81620161 100644 --- a/internal/controller/node/peers_test.go +++ b/internal/controller/node/peers_test.go @@ -9,6 +9,8 @@ import ( seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) +const testRoleLabel = "role" + func TestReconcilePeers_ResolvesLabelSource(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: "my-node", Namespace: "default"}, @@ -49,8 +51,8 @@ func TestReconcilePeers_ResolvesLabelSource(t *testing.T) { t.Fatalf("expected 2 resolved peers, got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) } want := []string{ - "peer-1-0.peer-1.default.svc.cluster.local", - "peer-2-0.peer-2.default.svc.cluster.local", + "mock-node-id@peer-1-0.peer-1.default.svc.cluster.local:26656", + "mock-node-id@peer-2-0.peer-2.default.svc.cluster.local:26656", } for i, w := range want { if node.Status.ResolvedPeers[i] != w { @@ -59,6 +61,46 @@ func TestReconcilePeers_ResolvesLabelSource(t *testing.T) { } } +func TestReconcilePeers_PrefersExternalAddress(t *testing.T) { + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: "consumer", Namespace: "default"}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "test-1", + Image: "sei:latest", + Peers: []seiv1alpha1.PeerSource{ + {Label: &seiv1alpha1.LabelPeerSource{ + Selector: map[string]string{testRoleLabel: "publishable"}, + }}, + }, + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + } + peer := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pub-peer", Namespace: "default", + Labels: map[string]string{testRoleLabel: "publishable"}, + }, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "test-1", + Image: "sei:latest", + ExternalAddress: "pub-peer-p2p.test-1.example.com:26656", + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + } + + r, _ := newNodeReconciler(t, node, peer) + if err := r.reconcilePeers(context.Background(), node); err != nil { + t.Fatalf("reconcilePeers: %v", err) + } + if len(node.Status.ResolvedPeers) != 1 { + t.Fatalf("expected 1 peer, got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) + } + want := "mock-node-id@pub-peer-p2p.test-1.example.com:26656" + if node.Status.ResolvedPeers[0] != want { + t.Errorf("resolvedPeers[0] = %q, want %q", node.Status.ResolvedPeers[0], want) + } +} + func TestReconcilePeers_ExcludesSelf(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{ @@ -94,7 +136,7 @@ func TestReconcilePeers_ExcludesSelf(t *testing.T) { if len(node.Status.ResolvedPeers) != 1 { t.Fatalf("expected 1 resolved peer (self excluded), got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) } - if node.Status.ResolvedPeers[0] != "other-node-0.other-node.default.svc.cluster.local" { + if node.Status.ResolvedPeers[0] != "mock-node-id@other-node-0.other-node.default.svc.cluster.local:26656" { t.Errorf("resolvedPeers[0] = %q", node.Status.ResolvedPeers[0]) } } @@ -107,7 +149,7 @@ func TestReconcilePeers_CrossNamespace_DoesNotExcludeMatchingName(t *testing.T) Image: "sei:latest", Peers: []seiv1alpha1.PeerSource{ {Label: &seiv1alpha1.LabelPeerSource{ - Selector: map[string]string{"role": "peer"}, + Selector: map[string]string{testRoleLabel: "peer"}, Namespace: "ns-b", }}, }, @@ -118,7 +160,7 @@ func TestReconcilePeers_CrossNamespace_DoesNotExcludeMatchingName(t *testing.T) peerSameName := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{ Name: "shared-name", Namespace: "ns-b", - Labels: map[string]string{"role": "peer"}, + Labels: map[string]string{testRoleLabel: "peer"}, }, Spec: seiv1alpha1.SeiNodeSpec{ChainID: "test-1", Image: "sei:latest", FullNode: &seiv1alpha1.FullNodeSpec{}}, } @@ -149,7 +191,7 @@ func TestReconcilePeers_NoPatchWhenUnchanged(t *testing.T) { FullNode: &seiv1alpha1.FullNodeSpec{}, }, Status: seiv1alpha1.SeiNodeStatus{ - ResolvedPeers: []string{"peer-1-0.peer-1.default.svc.cluster.local"}, + ResolvedPeers: []string{"mock-node-id@peer-1-0.peer-1.default.svc.cluster.local:26656"}, }, } peer := &seiv1alpha1.SeiNode{ diff --git a/internal/controller/node/plan_execution_test.go b/internal/controller/node/plan_execution_test.go index 183e8ead..5d00e24d 100644 --- a/internal/controller/node/plan_execution_test.go +++ b/internal/controller/node/plan_execution_test.go @@ -53,6 +53,9 @@ type mockSidecarClient struct { // Healthz returns (healthz, healthzErr) when non-nil; defaults to (true, nil). healthz *bool healthzErr error + + nodeID string + nodeIDErr error } func (m *mockSidecarClient) SubmitTask(_ context.Context, req sidecar.TaskRequest) (uuid.UUID, error) { @@ -86,6 +89,10 @@ func (m *mockSidecarClient) Healthz(_ context.Context) (bool, error) { return true, m.healthzErr } +func (m *mockSidecarClient) GetNodeID(_ context.Context) (string, error) { + return m.nodeID, m.nodeIDErr +} + func strPtr(s string) *string { return &s } func completedResult(id uuid.UUID, taskType string, taskErr *string) *sidecar.TaskResult { diff --git a/internal/controller/node/reconciler_test.go b/internal/controller/node/reconciler_test.go index 20c1337c..6fb5d5b6 100644 --- a/internal/controller/node/reconciler_test.go +++ b/internal/controller/node/reconciler_test.go @@ -43,13 +43,15 @@ func newNodeReconciler(t *testing.T, objs ...client.Object) (*SeiNodeReconciler, WithObjects(objs...). WithStatusSubresource(&seiv1alpha1.SeiNode{}). Build() - mock := &mockSidecarClient{} + mock := &mockSidecarClient{nodeID: "mock-node-id"} r := &SeiNodeReconciler{ Client: c, Scheme: s, Recorder: record.NewFakeRecorder(100), Platform: platformtest.Config(), - Planner: &planner.NodeResolver{}, + Planner: &planner.NodeResolver{ + BuildSidecarClient: func(_ *seiv1alpha1.SeiNode) (task.SidecarClient, error) { return mock, nil }, + }, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{ ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/internal/controller/nodedeployment/envtest/stubs.go b/internal/controller/nodedeployment/envtest/stubs.go index 3e8e166d..f31c13bf 100644 --- a/internal/controller/nodedeployment/envtest/stubs.go +++ b/internal/controller/nodedeployment/envtest/stubs.go @@ -118,6 +118,12 @@ func (s *StubSidecarClient) Healthz(_ context.Context) (bool, error) { return true, nil } +// GetNodeID returns a deterministic stub identity. Real CometBFT node IDs +// are 40-char hex; this satisfies callers that only need a non-empty value. +func (s *StubSidecarClient) GetNodeID(_ context.Context) (string, error) { + return "stub-node-id", nil +} + // SubmittedCount is a debugging hook for tests — reports how many // distinct tasks the stub has accepted. Not used by the production // reconcile path; if it ever is, the stub interface drifted. diff --git a/internal/controller/nodetask/controller_test.go b/internal/controller/nodetask/controller_test.go index bd49abdb..a1cf81b0 100644 --- a/internal/controller/nodetask/controller_test.go +++ b/internal/controller/nodetask/controller_test.go @@ -249,7 +249,8 @@ func (f *fakeSidecarClient) GetTask(_ context.Context, id uuid.UUID) (*sidecar.T return nil, sidecar.ErrNotFound } -func (f *fakeSidecarClient) Healthz(_ context.Context) (bool, error) { return true, nil } +func (f *fakeSidecarClient) Healthz(_ context.Context) (bool, error) { return true, nil } +func (f *fakeSidecarClient) GetNodeID(_ context.Context) (string, error) { return "", nil } func (f *fakeSidecarClient) setResult(id uuid.UUID, status sidecar.TaskResultStatus, errMsg string) { f.mu.Lock() diff --git a/internal/planner/executor_test.go b/internal/planner/executor_test.go index b6cc0745..6c4585b3 100644 --- a/internal/planner/executor_test.go +++ b/internal/planner/executor_test.go @@ -66,6 +66,10 @@ func (m *mockSidecarClient) Healthz(_ context.Context) (bool, error) { return true, nil } +func (m *mockSidecarClient) GetNodeID(_ context.Context) (string, error) { + return "", nil +} + func testScheme(t *testing.T) *k8sruntime.Scheme { t.Helper() s := k8sruntime.NewScheme() diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 0b314418..a6ccd6ec 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -665,9 +665,12 @@ func discoverPeersTask(node *seiv1alpha1.SeiNode) sidecar.DiscoverPeersTask { Addresses: s.Static.Addresses, }) case s.Label != nil: + // Label peers are pre-composed (@:) by + // the controller's reconcilePeers; route to the static + // source so the sidecar writes them verbatim. sources = append(sources, sidecar.PeerSource{ - Type: sidecar.PeerSourceDNSEndpoints, - Endpoints: node.Status.ResolvedPeers, + Type: sidecar.PeerSourceStatic, + Addresses: node.Status.ResolvedPeers, }) } } diff --git a/internal/planner/sidecar_probe_test.go b/internal/planner/sidecar_probe_test.go index e8d9708c..d76b0f99 100644 --- a/internal/planner/sidecar_probe_test.go +++ b/internal/planner/sidecar_probe_test.go @@ -26,7 +26,8 @@ func (f *fakeSidecarClient) SubmitTask(context.Context, sidecar.TaskRequest) (uu func (f *fakeSidecarClient) GetTask(context.Context, uuid.UUID) (*sidecar.TaskResult, error) { return nil, sidecar.ErrNotFound } -func (f *fakeSidecarClient) Healthz(context.Context) (bool, error) { return f.healthy, f.err } +func (f *fakeSidecarClient) Healthz(context.Context) (bool, error) { return f.healthy, f.err } +func (f *fakeSidecarClient) GetNodeID(context.Context) (string, error) { return "", nil } func findSidecarReady(node *seiv1alpha1.SeiNode) *metav1.Condition { return apimeta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarReady) diff --git a/internal/task/task.go b/internal/task/task.go index 823daac7..cf9e3e18 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -137,13 +137,14 @@ func DeterministicTaskID(planID, taskType string, planIndex int) string { return uuid.NewSHA1(taskIDNamespace, []byte(seed)).String() } -// SidecarClient abstracts the sidecar HTTP API for task submission and -// status polling. Narrowed from the full seictl client to the two methods -// needed by task execution. Implementations must be safe for concurrent use. +// SidecarClient abstracts the sidecar HTTP API for the operations the +// controller invokes per-peer: task submission/polling, health, and +// peer-identity lookup. Safe for concurrent use. type SidecarClient interface { SubmitTask(ctx context.Context, req sidecar.TaskRequest) (uuid.UUID, error) GetTask(ctx context.Context, id uuid.UUID) (*sidecar.TaskResult, error) Healthz(ctx context.Context) (bool, error) + GetNodeID(ctx context.Context) (string, error) } // ExecutionConfig bundles all dependencies needed by task executions: diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 8b2f86e2..761b419a 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -957,9 +957,10 @@ spec: type: object resolvedPeers: description: |- - ResolvedPeers is the current set of peer DNS hostnames discovered - from label-based peer sources. Reconciled continuously so that - future peer-update plans can detect drift. + ResolvedPeers is the current set of `@:` peer + strings discovered from label-based peer sources, ready to write + into CometBFT's persistent_peers verbatim. Host is the peer's + Spec.ExternalAddress when set, otherwise the headless Service DNS. items: type: string type: array From 0fceb7aa3bef250c61a0ae6483e83050169a55e6 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 14:14:33 -0700 Subject: [PATCH 2/7] fix(peers): per-peer skip + prior-entry preservation on transient sidecar failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per platform-engineer's HOLD on PR #369: atomic resolve semantics wedge fleet-wide reconciles when any one label-matched peer's sidecar is transient-unavailable. In pacific-1 (6+ SNDs with mutual label selectors), a single archive sidecar restart would block every consumer's reconcile loop — not just peer updates, all of it, because reconcilePeers runs before the status patch. Fix: - On per-peer BuildSidecarClient or GetNodeID error, preserve that peer's prior entry from Status.ResolvedPeers if available. Indexed by host:port to look up the existing @host:port. - If no prior entry exists (peer never resolved), skip with a structured log line, do not fail the reconcile. - The genesis CollectAndSetPeers path keeps its atomic semantics — that's a one-shot bounded-cohort operation where retry is correct. - New tests: TestReconcilePeers_PreservesPriorEntryOnTransientFailure, TestReconcilePeers_SkipsNewPeerOnSidecarFailure. Addresses the platform-engineer HOLD reason on PR #369. K8s-specialist + sei-network-specialist already GO'd the atomic version; the "silent partial list" concern they raised is mitigated by the structured log line on every skip. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/node/peers.go | 54 ++++++++--- internal/controller/node/peers_test.go | 99 ++++++++++++++++++++- internal/controller/node/reconciler_test.go | 6 +- 3 files changed, 143 insertions(+), 16 deletions(-) diff --git a/internal/controller/node/peers.go b/internal/controller/node/peers.go index ea3d6877..83d59dc1 100644 --- a/internal/controller/node/peers.go +++ b/internal/controller/node/peers.go @@ -4,9 +4,11 @@ import ( "context" "fmt" "slices" + "strings" seiconfig "github.com/sei-protocol/sei-config" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) @@ -34,15 +36,21 @@ func (r *SeiNodeReconciler) reconcilePeers(ctx context.Context, node *seiv1alpha } // resolveLabelPeers lists SeiNodes matching the selector and returns -// fully-composed `@:` strings. The host is each -// peer's Spec.ExternalAddress when set, otherwise the headless Service -// DNS. node_id is fetched from each peer's sidecar via gRPC — the same -// in-cluster path the genesis CollectAndSetPeers task uses. +// fully-composed `@:` strings. Host is each peer's +// Spec.ExternalAddress when set, otherwise the headless Service DNS. +// node_id is fetched from each peer's sidecar via gRPC. +// +// Per-peer transient failures (sidecar restarting, gRPC timeout) do +// not fail the whole resolve. The peer's prior entry from +// Status.ResolvedPeers is preserved if available; otherwise the peer +// is skipped for this cycle with a structured log line. This keeps +// fleet-wide reconciles from wedging on a single peer's sidecar churn. func (r *SeiNodeReconciler) resolveLabelPeers( ctx context.Context, node *seiv1alpha1.SeiNode, src *seiv1alpha1.LabelPeerSource, ) ([]string, error) { + logger := log.FromContext(ctx) ns := node.Namespace if src.Namespace != "" { ns = src.Namespace @@ -56,6 +64,7 @@ func (r *SeiNodeReconciler) resolveLabelPeers( return nil, fmt.Errorf("listing peers by label: %w", err) } + prior := indexResolvedPeersByHost(node.Status.ResolvedPeers) var endpoints []string for i := range nodeList.Items { peer := &nodeList.Items[i] @@ -63,20 +72,43 @@ func (r *SeiNodeReconciler) resolveLabelPeers( continue } + address := peerAddress(peer) sc, err := r.Planner.BuildSidecarClient(peer) - if err != nil { - return nil, fmt.Errorf("building sidecar client for peer %s: %w", peer.Name, err) + if err == nil { + var nodeID string + nodeID, err = sc.GetNodeID(ctx) + if err == nil { + endpoints = append(endpoints, fmt.Sprintf("%s@%s", nodeID, address)) + continue + } } - nodeID, err := sc.GetNodeID(ctx) - if err != nil { - return nil, fmt.Errorf("fetching node_id for peer %s: %w", peer.Name, err) + // Per-peer failure: preserve the prior entry if we have one, + // otherwise skip until the peer's sidecar is reachable. + if existing, ok := prior[address]; ok { + logger.Info("preserving prior peer entry; node_id fetch failed", "peer", peer.Name, "err", err) + endpoints = append(endpoints, existing) + continue } - - endpoints = append(endpoints, fmt.Sprintf("%s@%s", nodeID, peerAddress(peer))) + logger.Info("skipping peer until node_id is resolvable", "peer", peer.Name, "err", err) } return endpoints, nil } +// indexResolvedPeersByHost maps the host:port portion of each composed +// peer entry back to the full string, so a transient sidecar failure +// can be papered over by reusing the prior entry. +func indexResolvedPeersByHost(peers []string) map[string]string { + out := make(map[string]string, len(peers)) + for _, p := range peers { + at := strings.Index(p, "@") + if at <= 0 || at == len(p)-1 { + continue + } + out[p[at+1:]] = p + } + return out +} + // peerAddress is the dial address for a peer: Spec.ExternalAddress (already // host:port from the SND publishable path) when set, otherwise the headless // Service DNS at the standard P2P port. diff --git a/internal/controller/node/peers_test.go b/internal/controller/node/peers_test.go index 81620161..8b148ac1 100644 --- a/internal/controller/node/peers_test.go +++ b/internal/controller/node/peers_test.go @@ -9,7 +9,18 @@ import ( seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) -const testRoleLabel = "role" +const ( + testRoleLabel = "role" + testRoleValue = "validator" + testConsumerName = "consumer" + testPeer1ResolvedID = "mock-node-id@peer-1-0.peer-1.default.svc.cluster.local:26656" +) + +// errStub is a minimal error type for test scaffolding (avoids dragging +// errors.New into a metadata import position). +type errStub string + +func (e errStub) Error() string { return string(e) } func TestReconcilePeers_ResolvesLabelSource(t *testing.T) { node := &seiv1alpha1.SeiNode{ @@ -51,7 +62,7 @@ func TestReconcilePeers_ResolvesLabelSource(t *testing.T) { t.Fatalf("expected 2 resolved peers, got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) } want := []string{ - "mock-node-id@peer-1-0.peer-1.default.svc.cluster.local:26656", + testPeer1ResolvedID, "mock-node-id@peer-2-0.peer-2.default.svc.cluster.local:26656", } for i, w := range want { @@ -63,7 +74,7 @@ func TestReconcilePeers_ResolvesLabelSource(t *testing.T) { func TestReconcilePeers_PrefersExternalAddress(t *testing.T) { node := &seiv1alpha1.SeiNode{ - ObjectMeta: metav1.ObjectMeta{Name: "consumer", Namespace: "default"}, + ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, Spec: seiv1alpha1.SeiNodeSpec{ ChainID: "test-1", Image: "sei:latest", @@ -191,7 +202,7 @@ func TestReconcilePeers_NoPatchWhenUnchanged(t *testing.T) { FullNode: &seiv1alpha1.FullNodeSpec{}, }, Status: seiv1alpha1.SeiNodeStatus{ - ResolvedPeers: []string{"mock-node-id@peer-1-0.peer-1.default.svc.cluster.local:26656"}, + ResolvedPeers: []string{testPeer1ResolvedID}, }, } peer := &seiv1alpha1.SeiNode{ @@ -231,6 +242,86 @@ func TestReconcilePeers_NoLabelSources_NoPatch(t *testing.T) { // No label sources means no resolved peers, no patch — just verifying no error } +// Per-peer transient failure: prior entry is preserved so a single +// peer's sidecar churn doesn't drop it from ResolvedPeers. +func TestReconcilePeers_PreservesPriorEntryOnTransientFailure(t *testing.T) { + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "test-1", + Image: "sei:latest", + Peers: []seiv1alpha1.PeerSource{ + {Label: &seiv1alpha1.LabelPeerSource{ + Selector: map[string]string{testRoleLabel: testRoleValue}, + }}, + }, + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + Status: seiv1alpha1.SeiNodeStatus{ + ResolvedPeers: []string{ + testPeer1ResolvedID, + }, + }, + } + peer := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: "peer-1", Namespace: "default", + Labels: map[string]string{testRoleLabel: testRoleValue}, + }, + Spec: seiv1alpha1.SeiNodeSpec{ChainID: "test-1", Image: "sei:latest", FullNode: &seiv1alpha1.FullNodeSpec{}}, + } + + // Sidecar returns an error — simulates a peer mid-restart. + mock := &mockSidecarClient{nodeIDErr: errStub("sidecar unreachable")} + r, _ := newNodeReconcilerWithSidecar(t, mock, node, peer) + + if err := r.reconcilePeers(context.Background(), node); err != nil { + t.Fatalf("reconcilePeers errored on transient peer failure: %v", err) + } + if len(node.Status.ResolvedPeers) != 1 { + t.Fatalf("expected prior entry preserved, got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) + } + want := testPeer1ResolvedID + if node.Status.ResolvedPeers[0] != want { + t.Errorf("resolvedPeers[0] = %q, want preserved %q", node.Status.ResolvedPeers[0], want) + } +} + +// New peer with no prior entry + sidecar failure: skip the peer this +// cycle instead of wedging the whole reconcile. +func TestReconcilePeers_SkipsNewPeerOnSidecarFailure(t *testing.T) { + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "test-1", + Image: "sei:latest", + Peers: []seiv1alpha1.PeerSource{ + {Label: &seiv1alpha1.LabelPeerSource{ + Selector: map[string]string{testRoleLabel: testRoleValue}, + }}, + }, + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + } + peer := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: "peer-1", Namespace: "default", + Labels: map[string]string{testRoleLabel: testRoleValue}, + }, + Spec: seiv1alpha1.SeiNodeSpec{ChainID: "test-1", Image: "sei:latest", FullNode: &seiv1alpha1.FullNodeSpec{}}, + } + + mock := &mockSidecarClient{nodeIDErr: errStub("sidecar unreachable")} + r, _ := newNodeReconcilerWithSidecar(t, mock, node, peer) + + if err := r.reconcilePeers(context.Background(), node); err != nil { + t.Fatalf("reconcilePeers errored on new-peer sidecar failure: %v", err) + } + if len(node.Status.ResolvedPeers) != 0 { + t.Fatalf("expected new unresolvable peer to be skipped, got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) + } +} + func TestReconcilePeers_DeduplicatesOverlappingSources(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: "my-node", Namespace: "default"}, diff --git a/internal/controller/node/reconciler_test.go b/internal/controller/node/reconciler_test.go index 6fb5d5b6..93d232c6 100644 --- a/internal/controller/node/reconciler_test.go +++ b/internal/controller/node/reconciler_test.go @@ -36,6 +36,11 @@ func newNodeTestScheme(t *testing.T) *k8sruntime.Scheme { } func newNodeReconciler(t *testing.T, objs ...client.Object) (*SeiNodeReconciler, client.Client) { + t.Helper() + return newNodeReconcilerWithSidecar(t, &mockSidecarClient{nodeID: "mock-node-id"}, objs...) +} + +func newNodeReconcilerWithSidecar(t *testing.T, mock *mockSidecarClient, objs ...client.Object) (*SeiNodeReconciler, client.Client) { t.Helper() s := newNodeTestScheme(t) c := fake.NewClientBuilder(). @@ -43,7 +48,6 @@ func newNodeReconciler(t *testing.T, objs ...client.Object) (*SeiNodeReconciler, WithObjects(objs...). WithStatusSubresource(&seiv1alpha1.SeiNode{}). Build() - mock := &mockSidecarClient{nodeID: "mock-node-id"} r := &SeiNodeReconciler{ Client: c, Scheme: s, From 10a8cbfbd80dbfdb895f433d43122add91870709 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 14:21:30 -0700 Subject: [PATCH 3/7] =?UTF-8?q?chore(peers):=20comment=20sweep=20=E2=80=94?= =?UTF-8?q?=20drop=20narration,=20lift=20WHY=20to=20PR=20body?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 5-9 lines → 2-4 across resolveLabelPeers, indexResolvedPeersByHost, peerAddress, the planner-side Label→Static branch, the SidecarClient interface, Status.ResolvedPeers godoc, and the envtest stub. No behavior change. Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/seinode_types.go | 6 ++--- internal/controller/node/peers.go | 26 ++++++------------- internal/controller/node/peers_test.go | 8 ++---- .../nodedeployment/envtest/stubs.go | 4 +-- internal/planner/planner.go | 5 ++-- internal/task/task.go | 5 ++-- 6 files changed, 18 insertions(+), 36 deletions(-) diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 85fd0b17..d82f0cbc 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -347,10 +347,8 @@ type SeiNodeStatus struct { // +optional Plan *TaskPlan `json:"plan,omitempty"` - // ResolvedPeers is the current set of `@:` peer - // strings discovered from label-based peer sources, ready to write - // into CometBFT's persistent_peers verbatim. Host is the peer's - // Spec.ExternalAddress when set, otherwise the headless Service DNS. + // ResolvedPeers carries `@:` entries resolved + // from label-based peer sources, ready for CometBFT's persistent_peers. // +optional ResolvedPeers []string `json:"resolvedPeers,omitempty"` diff --git a/internal/controller/node/peers.go b/internal/controller/node/peers.go index 83d59dc1..19f2a9f2 100644 --- a/internal/controller/node/peers.go +++ b/internal/controller/node/peers.go @@ -35,16 +35,10 @@ func (r *SeiNodeReconciler) reconcilePeers(ctx context.Context, node *seiv1alpha return nil } -// resolveLabelPeers lists SeiNodes matching the selector and returns -// fully-composed `@:` strings. Host is each peer's -// Spec.ExternalAddress when set, otherwise the headless Service DNS. -// node_id is fetched from each peer's sidecar via gRPC. -// -// Per-peer transient failures (sidecar restarting, gRPC timeout) do -// not fail the whole resolve. The peer's prior entry from -// Status.ResolvedPeers is preserved if available; otherwise the peer -// is skipped for this cycle with a structured log line. This keeps -// fleet-wide reconciles from wedging on a single peer's sidecar churn. +// resolveLabelPeers returns fully-composed `@:` +// strings for SeiNodes matching the selector. Per-peer sidecar failures +// preserve the prior entry from Status.ResolvedPeers (so transients +// don't wedge fleet-wide reconciles) or skip with a log line. func (r *SeiNodeReconciler) resolveLabelPeers( ctx context.Context, node *seiv1alpha1.SeiNode, @@ -82,8 +76,6 @@ func (r *SeiNodeReconciler) resolveLabelPeers( continue } } - // Per-peer failure: preserve the prior entry if we have one, - // otherwise skip until the peer's sidecar is reachable. if existing, ok := prior[address]; ok { logger.Info("preserving prior peer entry; node_id fetch failed", "peer", peer.Name, "err", err) endpoints = append(endpoints, existing) @@ -94,9 +86,8 @@ func (r *SeiNodeReconciler) resolveLabelPeers( return endpoints, nil } -// indexResolvedPeersByHost maps the host:port portion of each composed -// peer entry back to the full string, so a transient sidecar failure -// can be papered over by reusing the prior entry. +// indexResolvedPeersByHost maps `host:port` → `@host:port` for +// O(1) lookup of the prior composed entry on transient failure. func indexResolvedPeersByHost(peers []string) map[string]string { out := make(map[string]string, len(peers)) for _, p := range peers { @@ -109,9 +100,8 @@ func indexResolvedPeersByHost(peers []string) map[string]string { return out } -// peerAddress is the dial address for a peer: Spec.ExternalAddress (already -// host:port from the SND publishable path) when set, otherwise the headless -// Service DNS at the standard P2P port. +// peerAddress returns Spec.ExternalAddress (already host:port) when set, +// otherwise the headless Service DNS at the standard P2P port. func peerAddress(peer *seiv1alpha1.SeiNode) string { if peer.Spec.ExternalAddress != "" { return peer.Spec.ExternalAddress diff --git a/internal/controller/node/peers_test.go b/internal/controller/node/peers_test.go index 8b148ac1..aef7eac3 100644 --- a/internal/controller/node/peers_test.go +++ b/internal/controller/node/peers_test.go @@ -16,8 +16,6 @@ const ( testPeer1ResolvedID = "mock-node-id@peer-1-0.peer-1.default.svc.cluster.local:26656" ) -// errStub is a minimal error type for test scaffolding (avoids dragging -// errors.New into a metadata import position). type errStub string func (e errStub) Error() string { return string(e) } @@ -242,8 +240,7 @@ func TestReconcilePeers_NoLabelSources_NoPatch(t *testing.T) { // No label sources means no resolved peers, no patch — just verifying no error } -// Per-peer transient failure: prior entry is preserved so a single -// peer's sidecar churn doesn't drop it from ResolvedPeers. +// Transient sidecar failure: prior entry is preserved. func TestReconcilePeers_PreservesPriorEntryOnTransientFailure(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, @@ -287,8 +284,7 @@ func TestReconcilePeers_PreservesPriorEntryOnTransientFailure(t *testing.T) { } } -// New peer with no prior entry + sidecar failure: skip the peer this -// cycle instead of wedging the whole reconcile. +// New peer with no prior entry + sidecar failure: skip, don't wedge. func TestReconcilePeers_SkipsNewPeerOnSidecarFailure(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, diff --git a/internal/controller/nodedeployment/envtest/stubs.go b/internal/controller/nodedeployment/envtest/stubs.go index f31c13bf..7be0a68a 100644 --- a/internal/controller/nodedeployment/envtest/stubs.go +++ b/internal/controller/nodedeployment/envtest/stubs.go @@ -118,8 +118,8 @@ func (s *StubSidecarClient) Healthz(_ context.Context) (bool, error) { return true, nil } -// GetNodeID returns a deterministic stub identity. Real CometBFT node IDs -// are 40-char hex; this satisfies callers that only need a non-empty value. +// GetNodeID returns a deterministic stub identity (non-empty is all +// callers check for in envtest). func (s *StubSidecarClient) GetNodeID(_ context.Context) (string, error) { return "stub-node-id", nil } diff --git a/internal/planner/planner.go b/internal/planner/planner.go index a6ccd6ec..1b62a929 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -665,9 +665,8 @@ func discoverPeersTask(node *seiv1alpha1.SeiNode) sidecar.DiscoverPeersTask { Addresses: s.Static.Addresses, }) case s.Label != nil: - // Label peers are pre-composed (@:) by - // the controller's reconcilePeers; route to the static - // source so the sidecar writes them verbatim. + // ResolvedPeers is pre-composed `@:`; + // route as static so the sidecar writes them verbatim. sources = append(sources, sidecar.PeerSource{ Type: sidecar.PeerSourceStatic, Addresses: node.Status.ResolvedPeers, diff --git a/internal/task/task.go b/internal/task/task.go index cf9e3e18..5705f7d3 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -137,9 +137,8 @@ func DeterministicTaskID(planID, taskType string, planIndex int) string { return uuid.NewSHA1(taskIDNamespace, []byte(seed)).String() } -// SidecarClient abstracts the sidecar HTTP API for the operations the -// controller invokes per-peer: task submission/polling, health, and -// peer-identity lookup. Safe for concurrent use. +// SidecarClient abstracts the sidecar HTTP API: task submit/poll, health, +// and node_id lookup. Safe for concurrent use. type SidecarClient interface { SubmitTask(ctx context.Context, req sidecar.TaskRequest) (uuid.UUID, error) GetTask(ctx context.Context, id uuid.UUID) (*sidecar.TaskResult, error) From 1c518f82395e4be1a7742e9953797f24555d7a80 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 15:06:25 -0700 Subject: [PATCH 4/7] fix(peers): nil-guard BuildSidecarClient to honor planner's documented contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NodeResolver.BuildSidecarClient is documented as nilable ("Nil factory skips the sidecar probe; used by tests") and ResolvePlan nil-guards it. resolveLabelPeers was calling it unconditionally — a panic waiting on a test or future refactor that leaves the factory unset. Treat nil as a transient per-peer failure (matches existing fallback semantics: preserve prior entry or skip with a log line). Caught by Cursor Bugbot review. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/node/peers.go | 17 ++++++++++++- internal/controller/node/peers_test.go | 35 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/internal/controller/node/peers.go b/internal/controller/node/peers.go index 19f2a9f2..935f5feb 100644 --- a/internal/controller/node/peers.go +++ b/internal/controller/node/peers.go @@ -2,6 +2,7 @@ package node import ( "context" + "errors" "fmt" "slices" "strings" @@ -11,8 +12,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/task" ) +// errNoSidecarFactory matches the planner's documented "nil factory" contract +// (see planner.NodeResolver). resolveLabelPeers treats it as a transient +// per-peer failure rather than panicking. +var errNoSidecarFactory = errors.New("sidecar client factory is nil") + func (r *SeiNodeReconciler) reconcilePeers(ctx context.Context, node *seiv1alpha1.SeiNode) error { var resolved []string for _, src := range node.Spec.Peers { @@ -67,7 +74,15 @@ func (r *SeiNodeReconciler) resolveLabelPeers( } address := peerAddress(peer) - sc, err := r.Planner.BuildSidecarClient(peer) + var ( + sc task.SidecarClient + err error + ) + if r.Planner.BuildSidecarClient == nil { + err = errNoSidecarFactory + } else { + sc, err = r.Planner.BuildSidecarClient(peer) + } if err == nil { var nodeID string nodeID, err = sc.GetNodeID(ctx) diff --git a/internal/controller/node/peers_test.go b/internal/controller/node/peers_test.go index aef7eac3..1dbc2a4f 100644 --- a/internal/controller/node/peers_test.go +++ b/internal/controller/node/peers_test.go @@ -318,6 +318,41 @@ func TestReconcilePeers_SkipsNewPeerOnSidecarFailure(t *testing.T) { } } +// Nil BuildSidecarClient factory (planner contract: "Nil factory skips +// the sidecar probe; used by tests") must not panic — treat as transient. +func TestReconcilePeers_NilSidecarFactoryDoesNotPanic(t *testing.T) { + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "test-1", + Image: "sei:latest", + Peers: []seiv1alpha1.PeerSource{ + {Label: &seiv1alpha1.LabelPeerSource{ + Selector: map[string]string{testRoleLabel: testRoleValue}, + }}, + }, + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + } + peer := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: "peer-1", Namespace: "default", + Labels: map[string]string{testRoleLabel: testRoleValue}, + }, + Spec: seiv1alpha1.SeiNodeSpec{ChainID: "test-1", Image: "sei:latest", FullNode: &seiv1alpha1.FullNodeSpec{}}, + } + + r, _ := newNodeReconciler(t, node, peer) + r.Planner.BuildSidecarClient = nil + + if err := r.reconcilePeers(context.Background(), node); err != nil { + t.Fatalf("reconcilePeers errored on nil factory: %v", err) + } + if len(node.Status.ResolvedPeers) != 0 { + t.Fatalf("expected unresolvable peer to be skipped, got %d: %v", len(node.Status.ResolvedPeers), node.Status.ResolvedPeers) + } +} + func TestReconcilePeers_DeduplicatesOverlappingSources(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: "my-node", Namespace: "default"}, From c136aa64a76c12147c979fd88804197db7c86785 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 15:10:27 -0700 Subject: [PATCH 5/7] test(peers): cover nil-factory preserve-prior path + drop var-block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit K8s-specialist cross-review on the nil-guard fix asked for the missing test variant: nil factory + prior entry present must route to preserve-prior, not skip. Locks errNoSidecarFactory into the same recovery path as runtime transient sidecar errors. Also: drop the var ( ) block at the call site for a single var + sentinel init — closer to the boring-clear-code style used elsewhere in the file. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/node/peers.go | 10 ++---- internal/controller/node/peers_test.go | 42 ++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/internal/controller/node/peers.go b/internal/controller/node/peers.go index 935f5feb..21eb6fe8 100644 --- a/internal/controller/node/peers.go +++ b/internal/controller/node/peers.go @@ -74,13 +74,9 @@ func (r *SeiNodeReconciler) resolveLabelPeers( } address := peerAddress(peer) - var ( - sc task.SidecarClient - err error - ) - if r.Planner.BuildSidecarClient == nil { - err = errNoSidecarFactory - } else { + var sc task.SidecarClient + err := errNoSidecarFactory + if r.Planner.BuildSidecarClient != nil { sc, err = r.Planner.BuildSidecarClient(peer) } if err == nil { diff --git a/internal/controller/node/peers_test.go b/internal/controller/node/peers_test.go index 1dbc2a4f..c264eac9 100644 --- a/internal/controller/node/peers_test.go +++ b/internal/controller/node/peers_test.go @@ -319,8 +319,8 @@ func TestReconcilePeers_SkipsNewPeerOnSidecarFailure(t *testing.T) { } // Nil BuildSidecarClient factory (planner contract: "Nil factory skips -// the sidecar probe; used by tests") must not panic — treat as transient. -func TestReconcilePeers_NilSidecarFactoryDoesNotPanic(t *testing.T) { +// the sidecar probe; used by tests") routes to skip — no panic, no entry. +func TestReconcilePeers_NilSidecarFactorySkipsNewPeer(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, Spec: seiv1alpha1.SeiNodeSpec{ @@ -353,6 +353,44 @@ func TestReconcilePeers_NilSidecarFactoryDoesNotPanic(t *testing.T) { } } +// Nil factory with a prior entry: the preserve-prior branch fires (same +// recovery path as a runtime-transient sidecar error). +func TestReconcilePeers_NilSidecarFactoryPreservesPriorEntry(t *testing.T) { + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "test-1", + Image: "sei:latest", + Peers: []seiv1alpha1.PeerSource{ + {Label: &seiv1alpha1.LabelPeerSource{ + Selector: map[string]string{testRoleLabel: testRoleValue}, + }}, + }, + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + Status: seiv1alpha1.SeiNodeStatus{ + ResolvedPeers: []string{testPeer1ResolvedID}, + }, + } + peer := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: "peer-1", Namespace: "default", + Labels: map[string]string{testRoleLabel: testRoleValue}, + }, + Spec: seiv1alpha1.SeiNodeSpec{ChainID: "test-1", Image: "sei:latest", FullNode: &seiv1alpha1.FullNodeSpec{}}, + } + + r, _ := newNodeReconciler(t, node, peer) + r.Planner.BuildSidecarClient = nil + + if err := r.reconcilePeers(context.Background(), node); err != nil { + t.Fatalf("reconcilePeers errored on nil factory: %v", err) + } + if len(node.Status.ResolvedPeers) != 1 || node.Status.ResolvedPeers[0] != testPeer1ResolvedID { + t.Fatalf("expected prior entry preserved, got %v", node.Status.ResolvedPeers) + } +} + func TestReconcilePeers_DeduplicatesOverlappingSources(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: "my-node", Namespace: "default"}, From babb8e015cf254930f8735d2ce9a075eea557c29 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 15:11:24 -0700 Subject: [PATCH 6/7] chore(peers): final comment sweep on nil-guard follow-up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit errNoSidecarFactory doc 3 lines → 2; new test docstrings shortened to match the file's existing one-line convention. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/node/peers.go | 5 ++--- internal/controller/node/peers_test.go | 6 ++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/internal/controller/node/peers.go b/internal/controller/node/peers.go index 21eb6fe8..6839a6d2 100644 --- a/internal/controller/node/peers.go +++ b/internal/controller/node/peers.go @@ -15,9 +15,8 @@ import ( "github.com/sei-protocol/sei-k8s-controller/internal/task" ) -// errNoSidecarFactory matches the planner's documented "nil factory" contract -// (see planner.NodeResolver). resolveLabelPeers treats it as a transient -// per-peer failure rather than panicking. +// errNoSidecarFactory honors planner.NodeResolver's nilable-factory +// contract: resolveLabelPeers treats nil as a transient peer failure. var errNoSidecarFactory = errors.New("sidecar client factory is nil") func (r *SeiNodeReconciler) reconcilePeers(ctx context.Context, node *seiv1alpha1.SeiNode) error { diff --git a/internal/controller/node/peers_test.go b/internal/controller/node/peers_test.go index c264eac9..4657c0fa 100644 --- a/internal/controller/node/peers_test.go +++ b/internal/controller/node/peers_test.go @@ -318,8 +318,7 @@ func TestReconcilePeers_SkipsNewPeerOnSidecarFailure(t *testing.T) { } } -// Nil BuildSidecarClient factory (planner contract: "Nil factory skips -// the sidecar probe; used by tests") routes to skip — no panic, no entry. +// Nil factory + new peer: skip without panic. func TestReconcilePeers_NilSidecarFactorySkipsNewPeer(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, @@ -353,8 +352,7 @@ func TestReconcilePeers_NilSidecarFactorySkipsNewPeer(t *testing.T) { } } -// Nil factory with a prior entry: the preserve-prior branch fires (same -// recovery path as a runtime-transient sidecar error). +// Nil factory + prior entry: preserve-prior branch fires. func TestReconcilePeers_NilSidecarFactoryPreservesPriorEntry(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: testConsumerName, Namespace: "default"}, From 3b71ffc78893a2ab3edbcdb4f9626f2fd7acc48e Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 15:20:02 -0700 Subject: [PATCH 7/7] chore(manifests): regenerate CRDs for ResolvedPeers godoc trim Earlier comment sweep on api/v1alpha1/seinode_types.go didn't carry into the generated CRD YAMLs; verify-generated caught the drift. Co-Authored-By: Claude Opus 4.7 (1M context) --- config/crd/sei.io_seinodes.yaml | 6 ++---- manifests/sei.io_seinodes.yaml | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 761b419a..12810225 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -957,10 +957,8 @@ spec: type: object resolvedPeers: description: |- - ResolvedPeers is the current set of `@:` peer - strings discovered from label-based peer sources, ready to write - into CometBFT's persistent_peers verbatim. Host is the peer's - Spec.ExternalAddress when set, otherwise the headless Service DNS. + ResolvedPeers carries `@:` entries resolved + from label-based peer sources, ready for CometBFT's persistent_peers. items: type: string type: array diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 761b419a..12810225 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -957,10 +957,8 @@ spec: type: object resolvedPeers: description: |- - ResolvedPeers is the current set of `@:` peer - strings discovered from label-based peer sources, ready to write - into CometBFT's persistent_peers verbatim. Host is the peer's - Spec.ExternalAddress when set, otherwise the headless Service DNS. + ResolvedPeers carries `@:` entries resolved + from label-based peer sources, ready for CometBFT's persistent_peers. items: type: string type: array