wgengine/netstack: add debug page for TCP forwarder

To help in debugging issues with subnet routers in userspace mode–for
example, hitting the max inflight limit.

Updates tailscale/corp#12184

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: Idd922f4ae37695f6598a914c2d050574755ef309
This commit is contained in:
Andrew Dunham 2024-02-26 14:34:56 -05:00
parent 50fb8b9123
commit 35dc1fea72
4 changed files with 167 additions and 1 deletions

View File

@ -508,7 +508,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
hash/fnv from tailscale.com/wgengine/magicsock
hash/maphash from go4.org/mem
html from html/template+
html/template from github.com/gorilla/csrf
html/template from github.com/gorilla/csrf+
io from archive/tar+
io/fs from archive/tar+
io/ioutil from github.com/aws/aws-sdk-go-v2/aws/protocol/query+

View File

@ -755,6 +755,8 @@ func newNetstack(logf logger.Logf, sys *tsd.System) (*netstack.Impl, error) {
// Only register debug info if we have a debug mux
if debugMux != nil {
expvar.Publish("netstack", ret.ExpVar())
debugMux.HandleFunc("/debug/netstack/tcp-forwarder", ret.DebugTCPForwarder)
}
return ret, nil
}

129
wgengine/netstack/debug.go Normal file
View File

@ -0,0 +1,129 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package netstack
import (
"html/template"
"net/http"
"net/netip"
"slices"
"time"
xmaps "golang.org/x/exp/maps"
"gvisor.dev/gvisor/pkg/tcpip"
)
var tcpForwarderTemplate = template.Must(template.New("").Parse(`
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body { font-family: monospace; font-size: 12; }
td { padding: 0.3em; }
</style>
</head>
<body>
<h1>TCP Forwarder</h1>
<h2>TCP Statistics</h2>
<table border=1>
<tr>
<th>Metric</th>
<th>Value</th>
</tr>
{{ range .Stats }}
<tr><td>{{ .Key }}</td><td>{{ .Value }}</td></tr>
{{ end }}
</table>
<h2>In-Flight Outbound Connections</h2>
<table border=1>
<tr>
<th>Start Time</th>
<th>Client IP</th>
<th>Remote IP</th>
</tr>
{{ range .InFlightDials }}
<tr>
<td>{{ .Start.Format "2006-01-02T15:04:05Z07:00" }} ({{ printf "%.2f" .DurationSecs }} seconds ago)</td>
<td>{{ .ClientIP }}</td>
<td>{{ .RemoteAddr }}</td>
</tr>
{{ end }}
</table>
</body>
</html>
`))
// DebugTCPForwarder writes debug information about this netstack
// implementation's current TCP forwarder in HTML format.
func (ns *Impl) DebugTCPForwarder(w http.ResponseWriter, r *http.Request) {
// Grab data while holding the mutex
ns.tcpDebugMu.Lock()
tcpDials := xmaps.Values(ns.inFlightDials)
ns.tcpDebugMu.Unlock()
slices.SortFunc(tcpDials, func(a, b tcpDialInfo) int {
return a.start.Compare(b.start)
})
type templateDataStats struct {
Key string
Value uint64
}
type templateDataDial struct {
Start time.Time
DurationSecs float64
ClientIP netip.Addr
RemoteAddr netip.AddrPort
}
type templateData struct {
Stats []templateDataStats
InFlightDials []templateDataDial
}
var data templateData
// Statistics from gVisor
tcpStats := ns.ipstack.Stats().TCP
tcpMetrics := []struct {
name string
field *tcpip.StatCounter
}{
{"Active Connection Openings", tcpStats.ActiveConnectionOpenings},
{"Passive Connection Openings", tcpStats.PassiveConnectionOpenings},
{"Established Connections", tcpStats.CurrentEstablished},
{"Connected Connections", tcpStats.CurrentConnected},
{"Dropped In-Flight Forwarder Connections", tcpStats.ForwardMaxInFlightDrop},
{"Established Resets", tcpStats.EstablishedResets},
{"Established Timeout", tcpStats.EstablishedTimedout},
{"Failed Connection Attempts", tcpStats.FailedConnectionAttempts},
{"Retransmits", tcpStats.Retransmits},
{"Timeouts", tcpStats.Timeouts},
{"Checksum Errors", tcpStats.ChecksumErrors},
{"Failed Port Reservations", tcpStats.FailedPortReservations},
}
for _, metric := range tcpMetrics {
data.Stats = append(data.Stats, templateDataStats{
Key: metric.name,
Value: metric.field.Value(),
})
}
// Any in-flight DialContext calls in the TCP forwarding path.
now := time.Now()
for _, dial := range tcpDials {
elapsed := now.Sub(dial.start)
data.InFlightDials = append(data.InFlightDials, templateDataDial{
Start: dial.start,
DurationSecs: elapsed.Seconds(),
ClientIP: dial.clientRemoteIP,
RemoteAddr: dial.dialAddr,
})
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.Header().Set("Cache-Control", "no-cache")
tcpForwarderTemplate.Execute(w, &data)
}

View File

@ -150,6 +150,18 @@ type Impl struct {
// TCP connections, so they can be unregistered when connections are
// closed.
connsOpenBySubnetIP map[netip.Addr]int
// Debug information for the TCP forwarding code; all fields protected
// by tcpDebugMu.
tcpDebugMu sync.Mutex
inFlightDialCtr int
inFlightDials map[int]tcpDialInfo // keyed by a random integer
}
type tcpDialInfo struct {
clientRemoteIP netip.Addr
dialAddr netip.AddrPort
start time.Time
}
const nicID = 1
@ -242,6 +254,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
connsOpenBySubnetIP: make(map[netip.Addr]int),
dns: dns,
tailFSForLocal: tailFSForLocal,
inFlightDials: make(map[int]tcpDialInfo),
}
ns.ctx, ns.ctxCancel = context.WithCancel(context.Background())
ns.atomicIsLocalIPFunc.Store(tsaddr.FalseContainsIPFunc())
@ -981,6 +994,24 @@ func (ns *Impl) acceptTCP(r *tcp.ForwarderRequest) {
}
}
func (ns *Impl) debugForwardedTCP(clientRemoteIP netip.Addr, remote netip.AddrPort) func() {
ns.tcpDebugMu.Lock()
debugKey := ns.inFlightDialCtr
ns.inFlightDialCtr++
ns.inFlightDials[debugKey] = tcpDialInfo{
clientRemoteIP: clientRemoteIP,
dialAddr: remote,
start: time.Now(),
}
ns.tcpDebugMu.Unlock()
return func() {
ns.tcpDebugMu.Lock()
delete(ns.inFlightDials, debugKey)
ns.tcpDebugMu.Unlock()
}
}
func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.TCPConn, clientRemoteIP netip.Addr, wq *waiter.Queue, dialAddr netip.AddrPort) (handled bool) {
dialAddrStr := dialAddr.String()
if debugNetstack() {
@ -1008,9 +1039,13 @@ func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.
cancel()
}()
// Insert debug info, and remove it once we've dialed our outbound conn.
debugDialDone := ns.debugForwardedTCP(clientRemoteIP, dialAddr)
// Attempt to dial the outbound connection before we accept the inbound one.
var stdDialer net.Dialer
server, err := stdDialer.DialContext(ctx, "tcp", dialAddrStr)
debugDialDone()
if err != nil {
ns.logf("netstack: could not connect to local server at %s: %v", dialAddr.String(), err)
return