Skip to content

Commit c9fdbaf

Browse files
AkihiroSudaclaude
andcommitted
Preserve real client source IP in builtin port driver via IP_TRANSPARENT
Use IP_TRANSPARENT socket option in the child process to bind outgoing connections to the real client IP:port, so backend services see the original source address instead of 127.0.0.1. This leverages CAP_NET_ADMIN in the user namespace (Linux 4.18+) and policy routing to complete TCP handshakes without iptables. Falls back gracefully to normal dial on older kernels or when routing setup fails. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
1 parent ad75008 commit c9fdbaf

File tree

6 files changed

+352
-15
lines changed

6 files changed

+352
-15
lines changed

pkg/port/builtin/builtin_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@ func TestBuiltIn(t *testing.T) {
2929
return d
3030
}
3131
testsuite.Run(t, pf)
32+
testsuite.RunTCPTransparent(t, pf)
3233
}

pkg/port/builtin/child/child.go

Lines changed: 90 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@ import (
66
"io"
77
"net"
88
"os"
9+
"os/exec"
910
"strconv"
1011
"strings"
12+
"sync"
13+
"syscall"
14+
"time"
1115

1216
"golang.org/x/sys/unix"
1317

@@ -25,7 +29,8 @@ func NewDriver(logWriter io.Writer) port.ChildDriver {
2529
}
2630

2731
type childDriver struct {
28-
logWriter io.Writer
32+
logWriter io.Writer
33+
routingSetup sync.Once
2934
}
3035

3136
func (d *childDriver) RunChildDriver(opaque map[string]string, quit <-chan struct{}, detachedNetNSPath string) error {
@@ -119,7 +124,6 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
119124
}
120125
// dialProto does not need "4", "6" suffix
121126
dialProto := strings.TrimSuffix(strings.TrimSuffix(req.Proto, "6"), "4")
122-
var dialer net.Dialer
123127
ip := req.IP
124128
if ip == "" {
125129
ip = "127.0.0.1"
@@ -135,9 +139,24 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
135139
}
136140
ip = p.String()
137141
}
138-
targetConn, err := dialer.Dial(dialProto, net.JoinHostPort(ip, strconv.Itoa(req.Port)))
139-
if err != nil {
140-
return err
142+
targetAddr := net.JoinHostPort(ip, strconv.Itoa(req.Port))
143+
144+
var targetConn net.Conn
145+
var err error
146+
if req.SourceIP != "" && req.SourcePort != 0 && dialProto == "tcp" && !net.ParseIP(req.SourceIP).IsLoopback() {
147+
d.routingSetup.Do(func() { d.setupTransparentRouting() })
148+
targetConn, err = transparentDial(dialProto, targetAddr, req.SourceIP, req.SourcePort)
149+
if err != nil {
150+
fmt.Fprintf(d.logWriter, "transparent dial failed, falling back: %v\n", err)
151+
targetConn, err = nil, nil
152+
}
153+
}
154+
if targetConn == nil {
155+
var dialer net.Dialer
156+
targetConn, err = dialer.Dial(dialProto, targetAddr)
157+
if err != nil {
158+
return err
159+
}
141160
}
142161
defer targetConn.Close() // no effect on duplicated FD
143162
targetConnFiler, ok := targetConn.(filer)
@@ -164,6 +183,72 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
164183
return err
165184
}
166185

186+
// setupTransparentRouting sets up policy routing so that response packets
187+
// destined to transparent-bound source IPs are delivered locally.
188+
//
189+
// Transparent sockets (IP_TRANSPARENT) bind to non-local addresses (the real
190+
// client IP). Response packets to these addresses must be routed locally instead
191+
// of being sent out through the TAP device (slirp4netns).
192+
//
193+
// The transparent SYN goes through OUTPUT (where we tag it with CONNMARK) and
194+
// then either:
195+
//
196+
// 1. Gets DNAT'd to the container (nerdctl/CNI): the SYN-ACK arrives via the
197+
// bridge in PREROUTING, where we restore connmark to fwmark.
198+
//
199+
// 2. Goes through loopback to a userspace proxy like docker-proxy: the SYN
200+
// enters PREROUTING on loopback with connmark, which sets fwmark. With
201+
// tcp_fwmark_accept=1, the accepted socket inherits the fwmark. The proxy's
202+
// SYN-ACK is then routed via the fwmark table (local delivery) instead of
203+
// the default route (TAP), allowing it to reach the transparent socket.
204+
func (d *childDriver) setupTransparentRouting() {
205+
cmds := [][]string{
206+
// Table 100: treat all addresses as local (for delivery to transparent sockets)
207+
{"ip", "route", "add", "local", "default", "dev", "lo", "table", "100"},
208+
{"ip", "-6", "route", "add", "local", "default", "dev", "lo", "table", "100"},
209+
// Route fwmark-100 packets via table 100
210+
{"ip", "rule", "add", "fwmark", "100", "lookup", "100", "priority", "100"},
211+
{"ip", "-6", "rule", "add", "fwmark", "100", "lookup", "100", "priority", "100"},
212+
// Inherit fwmark from SYN to accepted socket (needed for userspace proxies
213+
// like docker-proxy, so that SYN-ACK routing uses table 100)
214+
{"sysctl", "-w", "net.ipv4.tcp_fwmark_accept=1"},
215+
// In OUTPUT: tag transparent connections (non-local source) with CONNMARK
216+
{"iptables", "-t", "mangle", "-A", "OUTPUT", "-p", "tcp", "-m", "addrtype", "!", "--src-type", "LOCAL", "-j", "CONNMARK", "--set-mark", "100"},
217+
{"ip6tables", "-t", "mangle", "-A", "OUTPUT", "-p", "tcp", "-m", "addrtype", "!", "--src-type", "LOCAL", "-j", "CONNMARK", "--set-mark", "100"},
218+
// In PREROUTING: restore connmark to fwmark for routing
219+
{"iptables", "-t", "mangle", "-A", "PREROUTING", "-p", "tcp", "-m", "connmark", "--mark", "100", "-j", "MARK", "--set-mark", "100"},
220+
{"ip6tables", "-t", "mangle", "-A", "PREROUTING", "-p", "tcp", "-m", "connmark", "--mark", "100", "-j", "MARK", "--set-mark", "100"},
221+
}
222+
for _, args := range cmds {
223+
if out, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil {
224+
fmt.Fprintf(d.logWriter, "transparent routing setup: %v: %s\n", err, out)
225+
}
226+
}
227+
}
228+
229+
// transparentDial dials targetAddr using IP_TRANSPARENT, binding to the given
230+
// source IP and port so the backend service sees the real client address.
231+
func transparentDial(dialProto, targetAddr, sourceIP string, sourcePort int) (net.Conn, error) {
232+
dialer := net.Dialer{
233+
Timeout: time.Second,
234+
LocalAddr: &net.TCPAddr{IP: net.ParseIP(sourceIP), Port: sourcePort},
235+
Control: func(network, address string, c syscall.RawConn) error {
236+
var sockErr error
237+
if err := c.Control(func(fd uintptr) {
238+
if strings.Contains(network, "6") {
239+
sockErr = unix.SetsockoptInt(int(fd), unix.SOL_IPV6, unix.IPV6_TRANSPARENT, 1)
240+
} else {
241+
sockErr = unix.SetsockoptInt(int(fd), unix.SOL_IP, unix.IP_TRANSPARENT, 1)
242+
}
243+
}); err != nil {
244+
return err
245+
}
246+
return sockErr
247+
},
248+
}
249+
return dialer.Dial(dialProto, targetAddr)
250+
}
251+
167252
// filer is implemented by *net.TCPConn and *net.UDPConn
168253
type filer interface {
169254
File() (f *os.File, err error)

pkg/port/builtin/msg/msg.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ type Request struct {
2525
Port int
2626
ParentIP string
2727
HostGatewayIP string
28+
SourceIP string `json:",omitempty"` // real client IP for IP_TRANSPARENT
29+
SourcePort int `json:",omitempty"` // real client port for IP_TRANSPARENT
2830
}
2931

3032
// Reply may contain FD as OOB
@@ -69,7 +71,9 @@ func hostGatewayIP() string {
6971

7072
// ConnectToChild connects to the child UNIX socket, and obtains TCP or UDP socket FD
7173
// that corresponds to the port spec.
72-
func ConnectToChild(c *net.UnixConn, spec port.Spec) (int, error) {
74+
// sourceAddr is the real client address (e.g., from net.Conn.RemoteAddr()) for IP_TRANSPARENT support.
75+
// Pass nil to skip source IP preservation.
76+
func ConnectToChild(c *net.UnixConn, spec port.Spec, sourceAddr net.Addr) (int, error) {
7377
req := Request{
7478
Type: RequestTypeConnect,
7579
Proto: spec.Proto,
@@ -78,6 +82,10 @@ func ConnectToChild(c *net.UnixConn, spec port.Spec) (int, error) {
7882
ParentIP: spec.ParentIP,
7983
HostGatewayIP: hostGatewayIP(),
8084
}
85+
if tcpAddr, ok := sourceAddr.(*net.TCPAddr); ok && tcpAddr != nil {
86+
req.SourceIP = tcpAddr.IP.String()
87+
req.SourcePort = tcpAddr.Port
88+
}
8189
if _, err := lowlevelmsgutil.MarshalToWriter(c, &req); err != nil {
8290
return 0, err
8391
}
@@ -114,21 +122,21 @@ func ConnectToChild(c *net.UnixConn, spec port.Spec) (int, error) {
114122
}
115123

116124
// ConnectToChildWithSocketPath wraps ConnectToChild
117-
func ConnectToChildWithSocketPath(socketPath string, spec port.Spec) (int, error) {
125+
func ConnectToChildWithSocketPath(socketPath string, spec port.Spec, sourceAddr net.Addr) (int, error) {
118126
var dialer net.Dialer
119127
conn, err := dialer.Dial("unix", socketPath)
120128
if err != nil {
121129
return 0, err
122130
}
123131
defer conn.Close()
124132
c := conn.(*net.UnixConn)
125-
return ConnectToChild(c, spec)
133+
return ConnectToChild(c, spec, sourceAddr)
126134
}
127135

128136
// ConnectToChildWithRetry retries ConnectToChild every (i*5) milliseconds.
129-
func ConnectToChildWithRetry(socketPath string, spec port.Spec, retries int) (int, error) {
137+
func ConnectToChildWithRetry(socketPath string, spec port.Spec, retries int, sourceAddr net.Addr) (int, error) {
130138
for i := 0; i < retries; i++ {
131-
fd, err := ConnectToChildWithSocketPath(socketPath, spec)
139+
fd, err := ConnectToChildWithSocketPath(socketPath, spec, sourceAddr)
132140
if i == retries-1 && err != nil {
133141
return 0, err
134142
}

pkg/port/builtin/parent/tcp/tcp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, stoppedCh ch
5959
func copyConnToChild(c net.Conn, socketPath string, spec port.Spec, stopCh <-chan struct{}) error {
6060
defer c.Close()
6161
// get fd from the child as an SCM_RIGHTS cmsg
62-
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10)
62+
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10, c.RemoteAddr())
6363
if err != nil {
6464
return err
6565
}

pkg/port/builtin/parent/udp/udp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, stoppedCh ch
2626
Listener: c,
2727
BackendDial: func() (*net.UDPConn, error) {
2828
// get fd from the child as an SCM_RIGHTS cmsg
29-
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10)
29+
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10, nil)
3030
if err != nil {
3131
return nil, err
3232
}

0 commit comments

Comments
 (0)