net/netcheck: add check for captive portal (#5593)
This doesn't change any behaviour for now, other than maybe running a full netcheck more often. The intent is to start gathering data on captive portals, and additionally, seeing this in the 'tailscale netcheck' command should provide a bit of additional information to users. Updates #1634 Change-Id: I6ba08f9c584dc0200619fa97f9fde1a319f25c76 Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
parent
aa5e494aba
commit
c6162c2a94
|
@ -133,6 +133,9 @@ func printReport(dm *tailcfg.DERPMap, report *netcheck.Report) error {
|
|||
printf("\t* MappingVariesByDestIP: %v\n", report.MappingVariesByDestIP)
|
||||
printf("\t* HairPinning: %v\n", report.HairPinning)
|
||||
printf("\t* PortMapping: %v\n", portMapping(report))
|
||||
if report.CaptivePortal != "" {
|
||||
printf("\t* CaptivePortal: %v\n", report.CaptivePortal)
|
||||
}
|
||||
|
||||
// When DERP latency checking failed,
|
||||
// magicsock will try to pick the DERP server that
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
|
@ -112,6 +113,10 @@ type Report struct {
|
|||
GlobalV4 string // ip:port of global IPv4
|
||||
GlobalV6 string // [ip]:port of global IPv6
|
||||
|
||||
// CaptivePortal is set when we think there's a captive portal that is
|
||||
// intercepting HTTP traffic.
|
||||
CaptivePortal opt.Bool
|
||||
|
||||
// TODO: update Clone when adding new fields
|
||||
}
|
||||
|
||||
|
@ -175,6 +180,10 @@ type Client struct {
|
|||
// If nil, portmap discovery is not done.
|
||||
PortMapper *portmapper.Client // lazily initialized on first use
|
||||
|
||||
// For tests
|
||||
testEnoughRegions int
|
||||
testCaptivePortalDelay time.Duration
|
||||
|
||||
mu sync.Mutex // guards following
|
||||
nextFull bool // do a full region scan, even if last != nil
|
||||
prev map[time.Time]*Report // some previous reports
|
||||
|
@ -192,6 +201,9 @@ type STUNConn interface {
|
|||
}
|
||||
|
||||
func (c *Client) enoughRegions() int {
|
||||
if c.testEnoughRegions > 0 {
|
||||
return c.testEnoughRegions
|
||||
}
|
||||
if c.Verbose {
|
||||
// Abuse verbose a bit here so netcheck can show all region latencies
|
||||
// in verbose mode.
|
||||
|
@ -200,6 +212,14 @@ func (c *Client) enoughRegions() int {
|
|||
return 3
|
||||
}
|
||||
|
||||
func (c *Client) captivePortalDelay() time.Duration {
|
||||
if c.testCaptivePortalDelay > 0 {
|
||||
return c.testCaptivePortalDelay
|
||||
}
|
||||
// Chosen semi-arbitrarily
|
||||
return 200 * time.Millisecond
|
||||
}
|
||||
|
||||
func (c *Client) logf(format string, a ...any) {
|
||||
if c.Logf != nil {
|
||||
c.Logf(format, a...)
|
||||
|
@ -783,13 +803,35 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
|
|||
}
|
||||
c.curState = rs
|
||||
last := c.last
|
||||
|
||||
// Even if we're doing a non-incremental update, we may want to try our
|
||||
// preferred DERP region for captive portal detection. Save that, if we
|
||||
// have it.
|
||||
var preferredDERP int
|
||||
if last != nil {
|
||||
preferredDERP = last.PreferredDERP
|
||||
}
|
||||
|
||||
now := c.timeNow()
|
||||
|
||||
doFull := false
|
||||
if c.nextFull || now.Sub(c.lastFull) > 5*time.Minute {
|
||||
doFull = true
|
||||
}
|
||||
// If the last report had a captive portal and reported no UDP access,
|
||||
// it's possible that we didn't get a useful netcheck due to the
|
||||
// captive portal blocking us. If so, make this report a full
|
||||
// (non-incremental) one.
|
||||
if !doFull && last != nil {
|
||||
doFull = !last.UDP && last.CaptivePortal.EqualBool(true)
|
||||
}
|
||||
if doFull {
|
||||
last = nil // causes makeProbePlan below to do a full (initial) plan
|
||||
c.nextFull = false
|
||||
c.lastFull = now
|
||||
metricNumGetReportFull.Add(1)
|
||||
}
|
||||
|
||||
rs.incremental = last != nil
|
||||
c.mu.Unlock()
|
||||
|
||||
|
@ -874,6 +916,48 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
|
|||
|
||||
plan := makeProbePlan(dm, ifState, last)
|
||||
|
||||
// If we're doing a full probe, also check for a captive portal. We
|
||||
// delay by a bit to wait for UDP STUN to finish, to avoid the probe if
|
||||
// it's unnecessary.
|
||||
captivePortalDone := syncs.ClosedChan()
|
||||
captivePortalStop := func() {}
|
||||
if !rs.incremental {
|
||||
// NOTE(andrew): we can't simply add this goroutine to the
|
||||
// `NewWaitGroupChan` below, since we don't wait for that
|
||||
// waitgroup to finish when exiting this function and thus get
|
||||
// a data race.
|
||||
ch := make(chan struct{})
|
||||
captivePortalDone = ch
|
||||
|
||||
tmr := time.AfterFunc(c.captivePortalDelay(), func() {
|
||||
defer close(ch)
|
||||
found, err := c.checkCaptivePortal(ctx, dm, preferredDERP)
|
||||
if err != nil {
|
||||
c.logf("[v1] checkCaptivePortal: %v", err)
|
||||
return
|
||||
}
|
||||
rs.report.CaptivePortal.Set(found)
|
||||
})
|
||||
|
||||
captivePortalStop = func() {
|
||||
// Don't cancel our captive portal check if we're
|
||||
// explicitly doing a verbose netcheck.
|
||||
if c.Verbose {
|
||||
return
|
||||
}
|
||||
|
||||
if tmr.Stop() {
|
||||
// Stopped successfully; need to close the
|
||||
// signal channel ourselves.
|
||||
close(ch)
|
||||
return
|
||||
}
|
||||
|
||||
// Did not stop; do nothing and it'll finish by itself
|
||||
// and close the signal channel.
|
||||
}
|
||||
}
|
||||
|
||||
wg := syncs.NewWaitGroupChan()
|
||||
wg.Add(len(plan))
|
||||
for _, probeSet := range plan {
|
||||
|
@ -894,9 +978,17 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
|
|||
case <-stunTimer.C:
|
||||
case <-ctx.Done():
|
||||
case <-wg.DoneChan():
|
||||
// All of our probes finished, so if we have >0 responses, we
|
||||
// stop our captive portal check.
|
||||
if rs.anyUDP() {
|
||||
captivePortalStop()
|
||||
}
|
||||
case <-rs.stopProbeCh:
|
||||
// Saw enough regions.
|
||||
c.vlogf("saw enough regions; not waiting for rest")
|
||||
// We can stop the captive portal check since we know that we
|
||||
// got a bunch of STUN responses.
|
||||
captivePortalStop()
|
||||
}
|
||||
|
||||
rs.waitHairCheck(ctx)
|
||||
|
@ -965,6 +1057,9 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
|
|||
wg.Wait()
|
||||
}
|
||||
|
||||
// Wait for captive portal check before finishing the report.
|
||||
<-captivePortalDone
|
||||
|
||||
return c.finishAndStoreReport(rs, dm), nil
|
||||
}
|
||||
|
||||
|
@ -979,6 +1074,54 @@ func (c *Client) finishAndStoreReport(rs *reportState, dm *tailcfg.DERPMap) *Rep
|
|||
return report
|
||||
}
|
||||
|
||||
var noRedirectClient = &http.Client{
|
||||
// No redirects allowed
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
},
|
||||
|
||||
// Remaining fields are the same as the default client.
|
||||
Transport: http.DefaultClient.Transport,
|
||||
Jar: http.DefaultClient.Jar,
|
||||
Timeout: http.DefaultClient.Timeout,
|
||||
}
|
||||
|
||||
// checkCaptivePortal reports whether or not we think the system is behind a
|
||||
// captive portal, detected by making a request to a URL that we know should
|
||||
// return a "204 No Content" response and checking if that's what we get.
|
||||
//
|
||||
// The boolean return is whether we think we have a captive portal.
|
||||
func (c *Client) checkCaptivePortal(ctx context.Context, dm *tailcfg.DERPMap, preferredDERP int) (bool, error) {
|
||||
defer noRedirectClient.CloseIdleConnections()
|
||||
|
||||
// If we have a preferred DERP region with more than one node, try
|
||||
// that; otherwise, pick a random one not marked as "Avoid".
|
||||
if preferredDERP == 0 || dm.Regions[preferredDERP] == nil ||
|
||||
(preferredDERP != 0 && len(dm.Regions[preferredDERP].Nodes) == 0) {
|
||||
rids := make([]int, 0, len(dm.Regions))
|
||||
for id, reg := range dm.Regions {
|
||||
if reg == nil || reg.Avoid || len(reg.Nodes) == 0 {
|
||||
continue
|
||||
}
|
||||
rids = append(rids, id)
|
||||
}
|
||||
preferredDERP = rids[rand.Intn(len(rids))]
|
||||
}
|
||||
|
||||
node := dm.Regions[preferredDERP].Nodes[0]
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", "http://"+node.HostName+"/generate_204", nil)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
r, err := noRedirectClient.Do(req)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
c.logf("[v2] checkCaptivePortal url=%q status_code=%d", req.URL.String(), r.StatusCode)
|
||||
|
||||
return r.StatusCode != 204, nil
|
||||
}
|
||||
|
||||
// runHTTPOnlyChecks is the netcheck done by environments that can
|
||||
// only do HTTP requests, such as ws/wasm.
|
||||
func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error {
|
||||
|
@ -1200,6 +1343,9 @@ func (c *Client) logConciseReport(r *Report, dm *tailcfg.DERPMap) {
|
|||
if r.GlobalV6 != "" {
|
||||
fmt.Fprintf(w, " v6a=%v", r.GlobalV6)
|
||||
}
|
||||
if r.CaptivePortal != "" {
|
||||
fmt.Fprintf(w, " captiveportal=%v", r.CaptivePortal)
|
||||
}
|
||||
fmt.Fprintf(w, " derp=%v", r.PreferredDERP)
|
||||
if r.PreferredDERP != 0 {
|
||||
fmt.Fprintf(w, " derpdist=")
|
||||
|
|
|
@ -9,11 +9,13 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -115,6 +117,9 @@ func TestWorksWhenUDPBlocked(t *testing.T) {
|
|||
// OS IPv6 test is irrelevant here, accept whatever the current
|
||||
// machine has.
|
||||
want.OSHasIPv6 = r.OSHasIPv6
|
||||
// Captive portal test is irrelevant; accept what the current report
|
||||
// has.
|
||||
want.CaptivePortal = r.CaptivePortal
|
||||
|
||||
if !reflect.DeepEqual(r, want) {
|
||||
t.Errorf("mismatch\n got: %+v\nwant: %+v\n", r, want)
|
||||
|
@ -661,3 +666,57 @@ func TestSortRegions(t *testing.T) {
|
|||
t.Errorf("got %v; want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoCaptivePortalWhenUDP(t *testing.T) {
|
||||
// Override noRedirectClient to handle the /generate_204 endpoint
|
||||
var generate204Called atomic.Bool
|
||||
tr := RoundTripFunc(func(req *http.Request) *http.Response {
|
||||
if !strings.HasSuffix(req.URL.String(), "/generate_204") {
|
||||
panic("bad URL: " + req.URL.String())
|
||||
}
|
||||
generate204Called.Store(true)
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusNoContent,
|
||||
Header: make(http.Header),
|
||||
}
|
||||
})
|
||||
|
||||
oldTransport := noRedirectClient.Transport
|
||||
t.Cleanup(func() { noRedirectClient.Transport = oldTransport })
|
||||
noRedirectClient.Transport = tr
|
||||
|
||||
stunAddr, cleanup := stuntest.Serve(t)
|
||||
defer cleanup()
|
||||
|
||||
c := &Client{
|
||||
Logf: t.Logf,
|
||||
UDPBindAddr: "127.0.0.1:0",
|
||||
testEnoughRegions: 1,
|
||||
|
||||
// Set the delay long enough that we have time to cancel it
|
||||
// when our STUN probe succeeds.
|
||||
testCaptivePortalDelay: 10 * time.Second,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
|
||||
defer cancel()
|
||||
|
||||
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Should not have called our captive portal function.
|
||||
if generate204Called.Load() {
|
||||
t.Errorf("captive portal check called; expected no call")
|
||||
}
|
||||
if r.CaptivePortal != "" {
|
||||
t.Errorf("got CaptivePortal=%q, want empty", r.CaptivePortal)
|
||||
}
|
||||
}
|
||||
|
||||
type RoundTripFunc func(req *http.Request) *http.Response
|
||||
|
||||
func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
return f(req), nil
|
||||
}
|
||||
|
|
|
@ -810,6 +810,7 @@ func (n *testNode) StartDaemonAsIPNGOOS(ipnGOOS string) *Daemon {
|
|||
"HTTPS_PROXY="+n.env.TrafficTrapServer.URL,
|
||||
"TS_DEBUG_TAILSCALED_IPN_GOOS="+ipnGOOS,
|
||||
"TS_LOGS_DIR="+t.TempDir(),
|
||||
"TS_NETCHECK_GENERATE_204_URL="+n.env.ControlServer.URL+"/generate_204",
|
||||
)
|
||||
cmd.Stderr = &nodeOutputParser{n: n}
|
||||
if *verboseTailscaled {
|
||||
|
|
|
@ -200,6 +200,9 @@ func (s *Server) logf(format string, a ...any) {
|
|||
func (s *Server) initMux() {
|
||||
s.mux = http.NewServeMux()
|
||||
s.mux.HandleFunc("/", s.serveUnhandled)
|
||||
s.mux.HandleFunc("/generate_204", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
})
|
||||
s.mux.HandleFunc("/key", s.serveKey)
|
||||
s.mux.HandleFunc("/machine/", s.serveMachine)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue