diff --git a/derp/derphttp/derphttp_client.go b/derp/derphttp/derphttp_client.go index b9d175619d65a..8bc5278889288 100644 --- a/derp/derphttp/derphttp_client.go +++ b/derp/derphttp/derphttp_client.go @@ -53,10 +53,42 @@ import ( // has been called). type Client struct { Header http.Header - TLSConfig *tls.Config // optional; nil means default - DNSCache *dnscache.Resolver // optional; nil means no caching - MeshKey string // optional; for trusted clients - IsProber bool // optional; for probers to optional declare themselves as such + TLSConfig *tls.Config // optional; nil means default + + // TLSConfigBypassesTLSDial, if true, causes TLSConfig to be used as-is + // (after a Clone and ServerName housekeeping) instead of being passed + // through tlsdial.Config. The default behavior wraps TLSConfig in + // Tailscale's hosted-DERP verification helper, which installs a + // VerifyConnection hook with a baked-in Let's Encrypt fallback and + // rejects (panics on) base configs that already set InsecureSkipVerify + // or VerifyConnection. + // + // Set this to true when the caller is responsible for server + // verification — e.g. when DERP is fronted by a reverse proxy that + // presents a non-publicly-trusted certificate, when using an mTLS + // framework that performs its own peer verification (custom CAs, + // SPIFFE-style identity), or any case in which the caller has supplied + // VerifyPeerCertificate / VerifyConnection on TLSConfig and does not + // want the bake-in fallback. + // + // SECURITY: when this flag is true, the supplied TLSConfig is the + // SOLE source of server verification. A TLSConfig with + // InsecureSkipVerify=true and no VerifyPeerCertificate / + // VerifyConnection callback will result in a TLS handshake that + // performs no server identity check at all. Callers MUST either rely + // on stock RootCAs + hostname matching (i.e. leave InsecureSkipVerify + // false), or provide a VerifyPeerCertificate / VerifyConnection that + // implements an equivalent check. + // + // When true, TLSConfig must be non-nil; the flag is ignored otherwise. + // Per-DERPNode overrides are still honored: InsecureForTests still + // disables verification (intentionally), but CertName (which is + // implemented by tlsdial) is ignored. + TLSConfigBypassesTLSDial bool + + DNSCache *dnscache.Resolver // optional; nil means no caching + MeshKey string // optional; for trusted clients + IsProber bool // optional; for probers to optional declare themselves as such // Allow forcing WebSocket fallback for situations where proxies do not // play well with `Upgrade: derp`. Turning this on will cause the client to @@ -704,14 +736,34 @@ func (c *Client) DialRegion(ctx context.Context, reg *tailcfg.DERPRegion) (net.C } func (c *Client) tlsConfig(node *tailcfg.DERPNode) *tls.Config { - tlsConf := tlsdial.Config(c.tlsServerName(node), c.TLSConfig) - if node != nil { - if node.InsecureForTests { + var tlsConf *tls.Config + if c.TLSConfigBypassesTLSDial && c.TLSConfig != nil { + // Caller has opted to bring their own server verification (custom + // CAs / mTLS / SPIFFE-style identity / non-publicly-trusted PKI). + // Use the supplied config as-is, only filling in ServerName when + // the caller didn't pin one. node.CertName (a tlsdial-specific + // domain-fronting hook) is intentionally not applied here; callers + // using bypass are expected to encode any cert-pinning in their + // own VerifyPeerCertificate / VerifyConnection. + tlsConf = c.TLSConfig.Clone() + if tlsConf.ServerName == "" { + tlsConf.ServerName = c.tlsServerName(node) + } + if node != nil && node.InsecureForTests { tlsConf.InsecureSkipVerify = true tlsConf.VerifyConnection = nil + tlsConf.VerifyPeerCertificate = nil } - if node.CertName != "" { - tlsdial.SetConfigExpectedCert(tlsConf, node.CertName) + } else { + tlsConf = tlsdial.Config(c.tlsServerName(node), c.TLSConfig) + if node != nil { + if node.InsecureForTests { + tlsConf.InsecureSkipVerify = true + tlsConf.VerifyConnection = nil + } + if node.CertName != "" { + tlsdial.SetConfigExpectedCert(tlsConf, node.CertName) + } } } tlsConf.NextProtos = []string{"http/1.1"} diff --git a/derp/derphttp/derphttp_test.go b/derp/derphttp/derphttp_test.go index 13e8c95599e7d..fc0f522a5924e 100644 --- a/derp/derphttp/derphttp_test.go +++ b/derp/derphttp/derphttp_test.go @@ -8,6 +8,7 @@ import ( "bytes" "context" "crypto/tls" + "crypto/x509" "net" "net/http" "net/http/httptest" @@ -17,6 +18,7 @@ import ( "github.com/coder/websocket" "tailscale.com/derp" + "tailscale.com/tailcfg" "tailscale.com/types/key" ) @@ -324,3 +326,101 @@ func TestForceWebsockets(t *testing.T) { c.Close() } + +func TestTLSConfigBypassesTLSDial(t *testing.T) { + node := &tailcfg.DERPNode{HostName: "derp.example.com"} + + t.Run("default path wraps via tlsdial.Config", func(t *testing.T) { + // tlsdial.Config installs its own VerifyConnection. Confirming it + // runs is the easiest way to assert we did NOT bypass it. We must + // not pass a base config that would cause tlsdial.Config to panic. + c := &Client{TLSConfig: &tls.Config{RootCAs: x509.NewCertPool()}} + got := c.tlsConfig(node) + if got.VerifyConnection == nil { + t.Fatal("expected default path to install tlsdial's VerifyConnection") + } + if !got.InsecureSkipVerify { + t.Fatal("expected default path to set InsecureSkipVerify (tlsdial does its own verify)") + } + if got.ServerName != node.HostName { + t.Fatalf("ServerName: got %q, want %q", got.ServerName, node.HostName) + } + }) + + t.Run("bypass path uses caller config as-is", func(t *testing.T) { + // A typical "I'm bringing my own verifier" config that would make + // tlsdial.Config panic. + var verifyCalls int + base := &tls.Config{ + InsecureSkipVerify: true, + VerifyPeerCertificate: func(_ [][]byte, _ [][]*x509.Certificate) error { + verifyCalls++ + return nil + }, + } + c := &Client{TLSConfig: base, TLSConfigBypassesTLSDial: true} + got := c.tlsConfig(node) + if got == base { + t.Fatal("expected tlsConfig to clone the base config") + } + if got.VerifyConnection != nil { + t.Fatal("bypass path must not install tlsdial's VerifyConnection") + } + if !got.InsecureSkipVerify { + t.Fatal("bypass path must preserve caller's InsecureSkipVerify") + } + if got.VerifyPeerCertificate == nil { + t.Fatal("bypass path must preserve caller's VerifyPeerCertificate") + } + if got.ServerName != node.HostName { + t.Fatalf("ServerName fallback: got %q, want %q", got.ServerName, node.HostName) + } + if want := []string{"http/1.1"}; len(got.NextProtos) != 1 || got.NextProtos[0] != want[0] { + t.Fatalf("NextProtos: got %v, want %v", got.NextProtos, want) + } + }) + + t.Run("bypass path preserves caller-set ServerName", func(t *testing.T) { + base := &tls.Config{ + InsecureSkipVerify: true, + VerifyPeerCertificate: func(_ [][]byte, _ [][]*x509.Certificate) error { + return nil + }, + ServerName: "explicit.example.com", + } + c := &Client{TLSConfig: base, TLSConfigBypassesTLSDial: true} + got := c.tlsConfig(node) + if got.ServerName != "explicit.example.com" { + t.Fatalf("ServerName: got %q, want explicit.example.com", got.ServerName) + } + }) + + t.Run("bypass flag without TLSConfig falls back to default path", func(t *testing.T) { + c := &Client{TLSConfigBypassesTLSDial: true} + got := c.tlsConfig(node) + if got.VerifyConnection == nil { + t.Fatal("expected fallback to tlsdial path when TLSConfig is nil") + } + }) + + t.Run("bypass path honors node.InsecureForTests", func(t *testing.T) { + base := &tls.Config{ + InsecureSkipVerify: true, + VerifyPeerCertificate: func(_ [][]byte, _ [][]*x509.Certificate) error { + return nil + }, + } + c := &Client{TLSConfig: base, TLSConfigBypassesTLSDial: true} + insecureNode := &tailcfg.DERPNode{HostName: "derp.example.com", InsecureForTests: true} + got := c.tlsConfig(insecureNode) + if !got.InsecureSkipVerify { + t.Fatal("expected InsecureForTests to keep InsecureSkipVerify=true") + } + if got.VerifyPeerCertificate != nil { + t.Fatal("expected InsecureForTests to clear VerifyPeerCertificate") + } + if got.VerifyConnection != nil { + t.Fatal("expected InsecureForTests to clear VerifyConnection") + } + }) +} diff --git a/net/netcheck/netcheck.go b/net/netcheck/netcheck.go index 0d1c2e2d5a3b4..04c9b83c1f5c2 100644 --- a/net/netcheck/netcheck.go +++ b/net/netcheck/netcheck.go @@ -208,9 +208,24 @@ type Client struct { // probes. GetDERPHeaders func() http.Header + // GetDERPTLSConfig, if non-nil, returns the TLS config and bypass-tlsdial + // setting to use for the next DERP probe. When set, it takes precedence + // over DERPTLSConfig and DERPTLSConfigBypassesTLSDial. This is useful when + // the DERP TLS config can be updated after the Client is constructed. + GetDERPTLSConfig func() (*tls.Config, bool) + // DERPTLSConfig is an optional TLS config for DERP connections. DERPTLSConfig *tls.Config + // DERPTLSConfigBypassesTLSDial mirrors + // derphttp.Client.TLSConfigBypassesTLSDial: when true (and + // DERPTLSConfig is non-nil), the netcheck DERP client uses the + // supplied TLS config as-is instead of wrapping it via tlsdial.Config. + // Use this when DERPTLSConfig performs its own server verification + // (e.g. mTLS frameworks with custom CAs / SPIFFE-style identity), as + // such configs cause tlsdial.Config to panic. + DERPTLSConfigBypassesTLSDial bool + // For tests testEnoughRegions int testCaptivePortalDelay time.Duration @@ -1305,8 +1320,14 @@ func (c *Client) measureHTTPLatency(ctx context.Context, reg *tailcfg.DERPRegion dc := derphttp.NewNetcheckClient(c.logf) dc.Header = derpHeaders - if c.DERPTLSConfig != nil { - dc.TLSConfig = c.DERPTLSConfig + derpTLSConfig := c.DERPTLSConfig + derpTLSConfigBypassesTLSDial := c.DERPTLSConfigBypassesTLSDial + if c.GetDERPTLSConfig != nil { + derpTLSConfig, derpTLSConfigBypassesTLSDial = c.GetDERPTLSConfig() + } + if derpTLSConfig != nil { + dc.TLSConfig = derpTLSConfig + dc.TLSConfigBypassesTLSDial = derpTLSConfigBypassesTLSDial } defer dc.Close() diff --git a/net/netcheck/netcheck_test.go b/net/netcheck/netcheck_test.go index 2bd995af1a7a4..2a872d0112595 100644 --- a/net/netcheck/netcheck_test.go +++ b/net/netcheck/netcheck_test.go @@ -7,6 +7,7 @@ import ( "bytes" "context" "crypto/tls" + "crypto/x509" "fmt" "net" "net/http" @@ -1107,6 +1108,95 @@ func TestProbeHeaders(t *testing.T) { } } +func TestProbeTLSConfigBypass(t *testing.T) { + logf, closeLogf := logger.LogfCloser(t.Logf) + defer closeLogf() + + // Create a DERP server manually, without a STUN server and with a custom + // handler. + derpServer := derp.NewServer(key.NewNode(), logf) + derpHandler := derphttp.Handler(derpServer) + + var called atomic.Bool + httpsrv := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called.Store(true) + if r.URL.Path == "/derp/latency-check" { + w.WriteHeader(http.StatusOK) + return + } + if r.URL.Path == "/derp" { + derpHandler.ServeHTTP(w, r) + return + } + + t.Errorf("unexpected request: %v", r.URL) + w.WriteHeader(http.StatusNotFound) + })) + httpsrv.Config.ErrorLog = logger.StdLogger(logf) + httpsrv.Config.TLSNextProto = make(map[string]func(*http.Server, *tls.Conn, http.Handler)) + httpsrv.StartTLS() + t.Cleanup(func() { + httpsrv.CloseClientConnections() + httpsrv.Close() + derpServer.Close() + }) + + derpMap := &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + RegionCode: "derpy", + Nodes: []*tailcfg.DERPNode{ + { + Name: "d1", + RegionID: 1, + HostName: "localhost", + // Don't specify an IP address to avoid ICMP pinging, + // which will bypass the artificial latency. + IPv4: "", + IPv6: "", + STUNPort: -1, + DERPPort: httpsrv.Listener.Addr().(*net.TCPAddr).Port, + InsecureForTests: true, + }, + }, + }, + }, + } + + var getterCalled atomic.Bool + c := &Client{ + Logf: t.Logf, + UDPBindAddr: "127.0.0.1:0", + GetDERPTLSConfig: func() (*tls.Config, bool) { + getterCalled.Store(true) + return &tls.Config{ + // This would panic inside tlsdial.Config if the bypass flag + // returned alongside it were not propagated to derphttp.Client. + InsecureSkipVerify: true, + VerifyPeerCertificate: func(_ [][]byte, _ [][]*x509.Certificate) error { + return nil + }, + }, true + }, + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + _, err := c.GetReport(ctx, derpMap) + if err != nil { + t.Fatal(err) + } + + if !getterCalled.Load() { + t.Error("didn't call GetDERPTLSConfig") + } + if !called.Load() { + t.Error("didn't call test handler") + } +} + func TestNeverPickSTUNOnlyRegionAsPreferredDERP(t *testing.T) { // Create two DERP regions, one with a STUN server only and one with only a // DERP node. Add artificial latency of 300ms to the DERP region, and test diff --git a/wgengine/magicsock/derp.go b/wgengine/magicsock/derp.go index fdea777f6f064..b398e8de189e1 100644 --- a/wgengine/magicsock/derp.go +++ b/wgengine/magicsock/derp.go @@ -347,8 +347,14 @@ func (c *Conn) derpWriteChanOfAddr(addr netip.AddrPort, peer key.NodePublic) cha dc.SetAddressFamilySelector(derpAddrFamSelector{c}) dc.SetForcedWebsocketCallback(c.derpForcedWebsocketFunc) dc.DNSCache = dnscache.Get() - if tlsCfg := c.derpTLSConfig.Load(); tlsCfg != nil { - dc.TLSConfig = tlsCfg + // Read the (TLS config, bypass-tlsdial) pair atomically so we never + // observe a new custom-verifier TLS config paired with a stale + // bypass=false (which would panic in tlsdial.Config) nor a new + // publicly-trusted config paired with a stale bypass=true (which would + // silently skip server verification). See Conn.derpTLSConfig. + if pair := c.derpTLSConfig.Load(); pair != nil && pair.cfg != nil { + dc.TLSConfig = pair.cfg + dc.TLSConfigBypassesTLSDial = pair.bypassTLSDial } header := c.derpHeader.Load() if header != nil { diff --git a/wgengine/magicsock/derp_tls_pair_test.go b/wgengine/magicsock/derp_tls_pair_test.go new file mode 100644 index 0000000000000..c675edf42b496 --- /dev/null +++ b/wgengine/magicsock/derp_tls_pair_test.go @@ -0,0 +1,107 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package magicsock + +import ( + "crypto/tls" + "sync" + "testing" +) + +// loadDERPTLSPair reads the (cfg, bypass) pair the way magicsock/derp.go +// reads it on every reconnect. Used by tests to assert that paired writers +// never expose an inconsistent (cfg, bypass) pairing to readers. +func (c *Conn) loadDERPTLSPair() (cfg *tls.Config, bypass bool) { + if p := c.derpTLSConfig.Load(); p != nil { + return p.cfg, p.bypassTLSDial + } + return nil, false +} + +func TestSetDERPTLSConfigPair(t *testing.T) { + t.Run("legacy SetDERPTLSConfig: bypass false, cfg set", func(t *testing.T) { + c := &Conn{} + want := &tls.Config{} + c.SetDERPTLSConfig(want) + got, bypass := c.loadDERPTLSPair() + if got != want { + t.Fatalf("cfg: got %p, want %p", got, want) + } + if bypass { + t.Fatal("bypass should default to false for SetDERPTLSConfig") + } + }) + + t.Run("SetDERPTLSConfigWithBypass updates pair atomically", func(t *testing.T) { + c := &Conn{} + want := &tls.Config{} + c.SetDERPTLSConfigWithBypass(want, true) + got, bypass := c.loadDERPTLSPair() + if got != want { + t.Fatalf("cfg: got %p, want %p", got, want) + } + if !bypass { + t.Fatal("bypass should be true") + } + + // Switch to a different cfg with bypass=false in one call. + other := &tls.Config{} + c.SetDERPTLSConfigWithBypass(other, false) + got, bypass = c.loadDERPTLSPair() + if got != other { + t.Fatalf("cfg after second set: got %p, want %p", got, other) + } + if bypass { + t.Fatal("bypass should be false after second set") + } + }) + + t.Run("readers never observe inconsistent pairing under concurrent writers", func(t *testing.T) { + // Two writers race: one keeps flipping between (publicCfg, false) + // and (metatronCfg, true). A reader observes the pair atomically + // on every iteration. The invariant: any time we observe + // metatronCfg we must observe bypass=true; any time we observe + // publicCfg we must observe bypass=false. If the (cfg, bypass) + // pair were stored as two independent atomics, this test would + // fail with -race because we'd occasionally observe a + // half-updated pairing. + c := &Conn{} + publicCfg := &tls.Config{} + metatronCfg := &tls.Config{InsecureSkipVerify: true} + c.SetDERPTLSConfigWithBypass(publicCfg, false) + + const iterations = 5_000 + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + for i := 0; i < iterations; i++ { + if i%2 == 0 { + c.SetDERPTLSConfigWithBypass(metatronCfg, true) + } else { + c.SetDERPTLSConfigWithBypass(publicCfg, false) + } + } + }() + go func() { + defer wg.Done() + for i := 0; i < iterations; i++ { + cfg, bypass := c.loadDERPTLSPair() + switch cfg { + case publicCfg: + if bypass { + t.Errorf("invariant violated: publicCfg paired with bypass=true (iter %d)", i) + return + } + case metatronCfg: + if !bypass { + t.Errorf("invariant violated: metatronCfg paired with bypass=false (iter %d)", i) + return + } + } + } + }() + wg.Wait() + }) +} diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 1349add573439..b860df089812b 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -76,6 +76,15 @@ const ( socketBufferSize = 7 << 20 ) +// derpTLSPair holds the TLS config and the bypass-tlsdial flag for DERP +// connections as a single atomic-pointer payload, so the DERP client +// constructor can observe both as a coherent pair. See +// Conn.SetDERPTLSConfigWithBypass. +type derpTLSPair struct { + cfg *tls.Config + bypassTLSDial bool +} + // A Conn routes UDP packets and actively manages a list of its endpoints. type Conn struct { // This block mirrors the contents and field order of the Options @@ -177,8 +186,12 @@ type Conn struct { // derpRegionDialer is passed to the DERP client derpRegionDialer atomic.Pointer[func(ctx context.Context, region *tailcfg.DERPRegion) net.Conn] - // derpTLSConfig is an optional TLS config for DERP connections. - derpTLSConfig atomic.Pointer[tls.Config] + // derpTLSConfig holds the TLS config and the bypass-tlsdial flag for + // DERP connections as a single atomic-pointer payload, so a reconnect + // can observe both as a coherent pair. It is updated by + // SetDERPTLSConfig and SetDERPTLSConfigWithBypass, and read by the + // DERP client constructor in magicsock/derp.go. + derpTLSConfig atomic.Pointer[derpTLSPair] // stats maintains per-connection counters. stats atomic.Pointer[connstats.Statistics] @@ -468,6 +481,13 @@ func NewConn(opts Options) (*Conn, error) { } return h.Clone() }, + GetDERPTLSConfig: func() (*tls.Config, bool) { + pair := c.derpTLSConfig.Load() + if pair == nil { + return nil, false + } + return pair.cfg, pair.bypassTLSDial + }, } c.ignoreSTUNPackets() @@ -1763,8 +1783,28 @@ func (c *Conn) SetDERPForceWebsockets(v bool) { c.derpForceWebsockets.Store(v) } +// SetDERPTLSConfig sets the TLS config used for DERP connections. The +// supplied config will be wrapped via tlsdial.Config when consumed. Use +// SetDERPTLSConfigWithBypass instead when the config performs its own +// server verification (e.g. custom CAs / mTLS frameworks that set +// InsecureSkipVerify=true and a VerifyPeerCertificate callback) — those +// configs cause tlsdial.Config to panic. +// +// SetDERPTLSConfig and SetDERPTLSConfigWithBypass update the (cfg, bypass) +// pair atomically with respect to readers in magicsock/derp.go. func (c *Conn) SetDERPTLSConfig(cfg *tls.Config) { - c.derpTLSConfig.Store(cfg) + c.derpTLSConfig.Store(&derpTLSPair{cfg: cfg, bypassTLSDial: false}) +} + +// SetDERPTLSConfigWithBypass atomically updates both the DERP TLS config +// and the bypass-tlsdial flag. This is the recommended setter when +// bypass=true: there is no observable intermediate state where the new +// custom-verifier config is paired with the old bypass=false flag (which +// would panic inside tlsdial.Config), nor any state where the new +// publicly-trusted config is paired with a stale bypass=true (which would +// silently skip server verification). +func (c *Conn) SetDERPTLSConfigWithBypass(cfg *tls.Config, bypassTLSDial bool) { + c.derpTLSConfig.Store(&derpTLSPair{cfg: cfg, bypassTLSDial: bypassTLSDial}) } func (c *Conn) SetDERPRegionDialer(dialer func(ctx context.Context, region *tailcfg.DERPRegion) net.Conn) {