@@ -6,8 +6,12 @@ import (
66 "io"
77 "net"
88 "os"
9+ "os/exec"
910 "strconv"
1011 "strings"
12+ "sync"
13+ "syscall"
14+ "time"
1115
1216 "golang.org/x/sys/unix"
1317
@@ -25,7 +29,8 @@ func NewDriver(logWriter io.Writer) port.ChildDriver {
2529}
2630
2731type childDriver struct {
28- logWriter io.Writer
32+ logWriter io.Writer
33+ routingSetup sync.Once
2934}
3035
3136func (d * childDriver ) RunChildDriver (opaque map [string ]string , quit <- chan struct {}, detachedNetNSPath string ) error {
@@ -119,7 +124,6 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
119124 }
120125 // dialProto does not need "4", "6" suffix
121126 dialProto := strings .TrimSuffix (strings .TrimSuffix (req .Proto , "6" ), "4" )
122- var dialer net.Dialer
123127 ip := req .IP
124128 if ip == "" {
125129 ip = "127.0.0.1"
@@ -135,9 +139,24 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
135139 }
136140 ip = p .String ()
137141 }
138- targetConn , err := dialer .Dial (dialProto , net .JoinHostPort (ip , strconv .Itoa (req .Port )))
139- if err != nil {
140- return err
142+ targetAddr := net .JoinHostPort (ip , strconv .Itoa (req .Port ))
143+
144+ var targetConn net.Conn
145+ var err error
146+ if req .SourceIP != "" && req .SourcePort != 0 && dialProto == "tcp" && ! net .ParseIP (req .SourceIP ).IsLoopback () {
147+ d .routingSetup .Do (func () { d .setupTransparentRouting () })
148+ targetConn , err = transparentDial (dialProto , targetAddr , req .SourceIP , req .SourcePort )
149+ if err != nil {
150+ fmt .Fprintf (d .logWriter , "transparent dial failed, falling back: %v\n " , err )
151+ targetConn , err = nil , nil
152+ }
153+ }
154+ if targetConn == nil {
155+ var dialer net.Dialer
156+ targetConn , err = dialer .Dial (dialProto , targetAddr )
157+ if err != nil {
158+ return err
159+ }
141160 }
142161 defer targetConn .Close () // no effect on duplicated FD
143162 targetConnFiler , ok := targetConn .(filer )
@@ -164,6 +183,72 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
164183 return err
165184}
166185
186+ // setupTransparentRouting sets up policy routing so that response packets
187+ // destined to transparent-bound source IPs are delivered locally.
188+ //
189+ // Transparent sockets (IP_TRANSPARENT) bind to non-local addresses (the real
190+ // client IP). Response packets to these addresses must be routed locally instead
191+ // of being sent out through the TAP device (slirp4netns).
192+ //
193+ // The transparent SYN goes through OUTPUT (where we tag it with CONNMARK) and
194+ // then either:
195+ //
196+ // 1. Gets DNAT'd to the container (nerdctl/CNI): the SYN-ACK arrives via the
197+ // bridge in PREROUTING, where we restore connmark to fwmark.
198+ //
199+ // 2. Goes through loopback to a userspace proxy like docker-proxy: the SYN
200+ // enters PREROUTING on loopback with connmark, which sets fwmark. With
201+ // tcp_fwmark_accept=1, the accepted socket inherits the fwmark. The proxy's
202+ // SYN-ACK is then routed via the fwmark table (local delivery) instead of
203+ // the default route (TAP), allowing it to reach the transparent socket.
204+ func (d * childDriver ) setupTransparentRouting () {
205+ cmds := [][]string {
206+ // Table 100: treat all addresses as local (for delivery to transparent sockets)
207+ {"ip" , "route" , "add" , "local" , "default" , "dev" , "lo" , "table" , "100" },
208+ {"ip" , "-6" , "route" , "add" , "local" , "default" , "dev" , "lo" , "table" , "100" },
209+ // Route fwmark-100 packets via table 100
210+ {"ip" , "rule" , "add" , "fwmark" , "100" , "lookup" , "100" , "priority" , "100" },
211+ {"ip" , "-6" , "rule" , "add" , "fwmark" , "100" , "lookup" , "100" , "priority" , "100" },
212+ // Inherit fwmark from SYN to accepted socket (needed for userspace proxies
213+ // like docker-proxy, so that SYN-ACK routing uses table 100)
214+ {"sysctl" , "-w" , "net.ipv4.tcp_fwmark_accept=1" },
215+ // In OUTPUT: tag transparent connections (non-local source) with CONNMARK
216+ {"iptables" , "-t" , "mangle" , "-A" , "OUTPUT" , "-p" , "tcp" , "-m" , "addrtype" , "!" , "--src-type" , "LOCAL" , "-j" , "CONNMARK" , "--set-mark" , "100" },
217+ {"ip6tables" , "-t" , "mangle" , "-A" , "OUTPUT" , "-p" , "tcp" , "-m" , "addrtype" , "!" , "--src-type" , "LOCAL" , "-j" , "CONNMARK" , "--set-mark" , "100" },
218+ // In PREROUTING: restore connmark to fwmark for routing
219+ {"iptables" , "-t" , "mangle" , "-A" , "PREROUTING" , "-p" , "tcp" , "-m" , "connmark" , "--mark" , "100" , "-j" , "MARK" , "--set-mark" , "100" },
220+ {"ip6tables" , "-t" , "mangle" , "-A" , "PREROUTING" , "-p" , "tcp" , "-m" , "connmark" , "--mark" , "100" , "-j" , "MARK" , "--set-mark" , "100" },
221+ }
222+ for _ , args := range cmds {
223+ if out , err := exec .Command (args [0 ], args [1 :]... ).CombinedOutput (); err != nil {
224+ fmt .Fprintf (d .logWriter , "transparent routing setup: %v: %s\n " , err , out )
225+ }
226+ }
227+ }
228+
229+ // transparentDial dials targetAddr using IP_TRANSPARENT, binding to the given
230+ // source IP and port so the backend service sees the real client address.
231+ func transparentDial (dialProto , targetAddr , sourceIP string , sourcePort int ) (net.Conn , error ) {
232+ dialer := net.Dialer {
233+ Timeout : time .Second ,
234+ LocalAddr : & net.TCPAddr {IP : net .ParseIP (sourceIP ), Port : sourcePort },
235+ Control : func (network , address string , c syscall.RawConn ) error {
236+ var sockErr error
237+ if err := c .Control (func (fd uintptr ) {
238+ if strings .Contains (network , "6" ) {
239+ sockErr = unix .SetsockoptInt (int (fd ), unix .SOL_IPV6 , unix .IPV6_TRANSPARENT , 1 )
240+ } else {
241+ sockErr = unix .SetsockoptInt (int (fd ), unix .SOL_IP , unix .IP_TRANSPARENT , 1 )
242+ }
243+ }); err != nil {
244+ return err
245+ }
246+ return sockErr
247+ },
248+ }
249+ return dialer .Dial (dialProto , targetAddr )
250+ }
251+
167252// filer is implemented by *net.TCPConn and *net.UDPConn
168253type filer interface {
169254 File () (f * os.File , err error )
0 commit comments