wgengine/netstack: add debug page for TCP forwarder
To help in debugging issues with subnet routers in userspace mode–for example, hitting the max inflight limit. Updates tailscale/corp#12184 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: Idd922f4ae37695f6598a914c2d050574755ef309
This commit is contained in:
parent
50fb8b9123
commit
35dc1fea72
|
@ -508,7 +508,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
|||
hash/fnv from tailscale.com/wgengine/magicsock
|
||||
hash/maphash from go4.org/mem
|
||||
html from html/template+
|
||||
html/template from github.com/gorilla/csrf
|
||||
html/template from github.com/gorilla/csrf+
|
||||
io from archive/tar+
|
||||
io/fs from archive/tar+
|
||||
io/ioutil from github.com/aws/aws-sdk-go-v2/aws/protocol/query+
|
||||
|
|
|
@ -755,6 +755,8 @@ func newNetstack(logf logger.Logf, sys *tsd.System) (*netstack.Impl, error) {
|
|||
// Only register debug info if we have a debug mux
|
||||
if debugMux != nil {
|
||||
expvar.Publish("netstack", ret.ExpVar())
|
||||
|
||||
debugMux.HandleFunc("/debug/netstack/tcp-forwarder", ret.DebugTCPForwarder)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package netstack
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
xmaps "golang.org/x/exp/maps"
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
)
|
||||
|
||||
var tcpForwarderTemplate = template.Must(template.New("").Parse(`
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<style>
|
||||
body { font-family: monospace; font-size: 12; }
|
||||
td { padding: 0.3em; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>TCP Forwarder</h1>
|
||||
|
||||
<h2>TCP Statistics</h2>
|
||||
<table border=1>
|
||||
<tr>
|
||||
<th>Metric</th>
|
||||
<th>Value</th>
|
||||
</tr>
|
||||
{{ range .Stats }}
|
||||
<tr><td>{{ .Key }}</td><td>{{ .Value }}</td></tr>
|
||||
{{ end }}
|
||||
</table>
|
||||
|
||||
<h2>In-Flight Outbound Connections</h2>
|
||||
<table border=1>
|
||||
<tr>
|
||||
<th>Start Time</th>
|
||||
<th>Client IP</th>
|
||||
<th>Remote IP</th>
|
||||
</tr>
|
||||
{{ range .InFlightDials }}
|
||||
<tr>
|
||||
<td>{{ .Start.Format "2006-01-02T15:04:05Z07:00" }} ({{ printf "%.2f" .DurationSecs }} seconds ago)</td>
|
||||
<td>{{ .ClientIP }}</td>
|
||||
<td>{{ .RemoteAddr }}</td>
|
||||
</tr>
|
||||
{{ end }}
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
`))
|
||||
|
||||
// DebugTCPForwarder writes debug information about this netstack
|
||||
// implementation's current TCP forwarder in HTML format.
|
||||
func (ns *Impl) DebugTCPForwarder(w http.ResponseWriter, r *http.Request) {
|
||||
// Grab data while holding the mutex
|
||||
ns.tcpDebugMu.Lock()
|
||||
tcpDials := xmaps.Values(ns.inFlightDials)
|
||||
ns.tcpDebugMu.Unlock()
|
||||
|
||||
slices.SortFunc(tcpDials, func(a, b tcpDialInfo) int {
|
||||
return a.start.Compare(b.start)
|
||||
})
|
||||
|
||||
type templateDataStats struct {
|
||||
Key string
|
||||
Value uint64
|
||||
}
|
||||
type templateDataDial struct {
|
||||
Start time.Time
|
||||
DurationSecs float64
|
||||
ClientIP netip.Addr
|
||||
RemoteAddr netip.AddrPort
|
||||
}
|
||||
type templateData struct {
|
||||
Stats []templateDataStats
|
||||
InFlightDials []templateDataDial
|
||||
}
|
||||
|
||||
var data templateData
|
||||
|
||||
// Statistics from gVisor
|
||||
tcpStats := ns.ipstack.Stats().TCP
|
||||
tcpMetrics := []struct {
|
||||
name string
|
||||
field *tcpip.StatCounter
|
||||
}{
|
||||
{"Active Connection Openings", tcpStats.ActiveConnectionOpenings},
|
||||
{"Passive Connection Openings", tcpStats.PassiveConnectionOpenings},
|
||||
{"Established Connections", tcpStats.CurrentEstablished},
|
||||
{"Connected Connections", tcpStats.CurrentConnected},
|
||||
{"Dropped In-Flight Forwarder Connections", tcpStats.ForwardMaxInFlightDrop},
|
||||
{"Established Resets", tcpStats.EstablishedResets},
|
||||
{"Established Timeout", tcpStats.EstablishedTimedout},
|
||||
{"Failed Connection Attempts", tcpStats.FailedConnectionAttempts},
|
||||
{"Retransmits", tcpStats.Retransmits},
|
||||
{"Timeouts", tcpStats.Timeouts},
|
||||
{"Checksum Errors", tcpStats.ChecksumErrors},
|
||||
{"Failed Port Reservations", tcpStats.FailedPortReservations},
|
||||
}
|
||||
for _, metric := range tcpMetrics {
|
||||
data.Stats = append(data.Stats, templateDataStats{
|
||||
Key: metric.name,
|
||||
Value: metric.field.Value(),
|
||||
})
|
||||
}
|
||||
|
||||
// Any in-flight DialContext calls in the TCP forwarding path.
|
||||
now := time.Now()
|
||||
for _, dial := range tcpDials {
|
||||
elapsed := now.Sub(dial.start)
|
||||
data.InFlightDials = append(data.InFlightDials, templateDataDial{
|
||||
Start: dial.start,
|
||||
DurationSecs: elapsed.Seconds(),
|
||||
ClientIP: dial.clientRemoteIP,
|
||||
RemoteAddr: dial.dialAddr,
|
||||
})
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
w.Header().Set("Cache-Control", "no-cache")
|
||||
tcpForwarderTemplate.Execute(w, &data)
|
||||
}
|
|
@ -150,6 +150,18 @@ type Impl struct {
|
|||
// TCP connections, so they can be unregistered when connections are
|
||||
// closed.
|
||||
connsOpenBySubnetIP map[netip.Addr]int
|
||||
|
||||
// Debug information for the TCP forwarding code; all fields protected
|
||||
// by tcpDebugMu.
|
||||
tcpDebugMu sync.Mutex
|
||||
inFlightDialCtr int
|
||||
inFlightDials map[int]tcpDialInfo // keyed by a random integer
|
||||
}
|
||||
|
||||
type tcpDialInfo struct {
|
||||
clientRemoteIP netip.Addr
|
||||
dialAddr netip.AddrPort
|
||||
start time.Time
|
||||
}
|
||||
|
||||
const nicID = 1
|
||||
|
@ -242,6 +254,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
|
|||
connsOpenBySubnetIP: make(map[netip.Addr]int),
|
||||
dns: dns,
|
||||
tailFSForLocal: tailFSForLocal,
|
||||
inFlightDials: make(map[int]tcpDialInfo),
|
||||
}
|
||||
ns.ctx, ns.ctxCancel = context.WithCancel(context.Background())
|
||||
ns.atomicIsLocalIPFunc.Store(tsaddr.FalseContainsIPFunc())
|
||||
|
@ -981,6 +994,24 @@ func (ns *Impl) acceptTCP(r *tcp.ForwarderRequest) {
|
|||
}
|
||||
}
|
||||
|
||||
func (ns *Impl) debugForwardedTCP(clientRemoteIP netip.Addr, remote netip.AddrPort) func() {
|
||||
ns.tcpDebugMu.Lock()
|
||||
debugKey := ns.inFlightDialCtr
|
||||
ns.inFlightDialCtr++
|
||||
ns.inFlightDials[debugKey] = tcpDialInfo{
|
||||
clientRemoteIP: clientRemoteIP,
|
||||
dialAddr: remote,
|
||||
start: time.Now(),
|
||||
}
|
||||
ns.tcpDebugMu.Unlock()
|
||||
|
||||
return func() {
|
||||
ns.tcpDebugMu.Lock()
|
||||
delete(ns.inFlightDials, debugKey)
|
||||
ns.tcpDebugMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.TCPConn, clientRemoteIP netip.Addr, wq *waiter.Queue, dialAddr netip.AddrPort) (handled bool) {
|
||||
dialAddrStr := dialAddr.String()
|
||||
if debugNetstack() {
|
||||
|
@ -1008,9 +1039,13 @@ func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.
|
|||
cancel()
|
||||
}()
|
||||
|
||||
// Insert debug info, and remove it once we've dialed our outbound conn.
|
||||
debugDialDone := ns.debugForwardedTCP(clientRemoteIP, dialAddr)
|
||||
|
||||
// Attempt to dial the outbound connection before we accept the inbound one.
|
||||
var stdDialer net.Dialer
|
||||
server, err := stdDialer.DialContext(ctx, "tcp", dialAddrStr)
|
||||
debugDialDone()
|
||||
if err != nil {
|
||||
ns.logf("netstack: could not connect to local server at %s: %v", dialAddr.String(), err)
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue