wgengine/magicsock: don't change DERP home when not connected to control
This pretty much always results in an outage because peers won't discover our new home region and thus won't be able to establish connectivity. Updates tailscale/corp#18095 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: Ic0d09133f198b528dd40c6383b16d7663d9d37a7
This commit is contained in:
parent
54e52532eb
commit
f072d017bd
|
@ -23,6 +23,7 @@ import (
|
|||
"tailscale.com/health"
|
||||
"tailscale.com/logtail/backoff"
|
||||
"tailscale.com/net/dnscache"
|
||||
"tailscale.com/net/netcheck"
|
||||
"tailscale.com/net/tsaddr"
|
||||
"tailscale.com/syncs"
|
||||
"tailscale.com/tailcfg"
|
||||
|
@ -31,6 +32,7 @@ import (
|
|||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/mak"
|
||||
"tailscale.com/util/sysresources"
|
||||
"tailscale.com/util/testenv"
|
||||
)
|
||||
|
||||
// useDerpRoute reports whether magicsock should enable the DERP
|
||||
|
@ -85,7 +87,10 @@ type activeDerp struct {
|
|||
createTime time.Time
|
||||
}
|
||||
|
||||
var processStartUnixNano = time.Now().UnixNano()
|
||||
var (
|
||||
processStartUnixNano = time.Now().UnixNano()
|
||||
pickDERPFallbackForTests func() int
|
||||
)
|
||||
|
||||
// pickDERPFallback returns a non-zero but deterministic DERP node to
|
||||
// connect to. This is only used if netcheck couldn't find the
|
||||
|
@ -121,11 +126,59 @@ func (c *Conn) pickDERPFallback() int {
|
|||
return c.myDerp
|
||||
}
|
||||
|
||||
if pickDERPFallbackForTests != nil {
|
||||
return pickDERPFallbackForTests()
|
||||
}
|
||||
|
||||
h := fnv.New64()
|
||||
fmt.Fprintf(h, "%p/%d", c, processStartUnixNano) // arbitrary
|
||||
return ids[rand.New(rand.NewSource(int64(h.Sum64()))).Intn(len(ids))]
|
||||
}
|
||||
|
||||
// This allows existing tests to pass, but allows us to still test the
|
||||
// behaviour during tests.
|
||||
var checkControlHealthDuringNearestDERPInTests = false
|
||||
|
||||
// maybeSetNearestDERP selects and changes the nearest/preferred DERP server
|
||||
// based on the netcheck report and other heuristics. It returns the DERP
|
||||
// region that it selected and set (via setNearestDERP).
|
||||
//
|
||||
// c.mu must NOT be held.
|
||||
func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int) {
|
||||
// Don't change our PreferredDERP if we don't have a connection to
|
||||
// control; if we don't, then we can't inform peers about a DERP home
|
||||
// change, which breaks all connectivity. Even if this DERP region is
|
||||
// down, changing our home DERP isn't correct since peers can't
|
||||
// discover that change.
|
||||
//
|
||||
// See https://github.com/tailscale/corp/issues/18095
|
||||
//
|
||||
// For tests, always assume we're connected to control unless we're
|
||||
// explicitly testing this behaviour.
|
||||
var connectedToControl bool
|
||||
if testenv.InTest() && !checkControlHealthDuringNearestDERPInTests {
|
||||
connectedToControl = true
|
||||
} else {
|
||||
connectedToControl = health.GetInPollNetMap()
|
||||
}
|
||||
if !connectedToControl {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return c.myDerp
|
||||
}
|
||||
|
||||
preferredDERP = report.PreferredDERP
|
||||
if preferredDERP == 0 {
|
||||
// Perhaps UDP is blocked. Pick a deterministic but arbitrary
|
||||
// one.
|
||||
preferredDERP = c.pickDERPFallback()
|
||||
}
|
||||
if !c.setNearestDERP(preferredDERP) {
|
||||
preferredDERP = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (c *Conn) derpRegionCodeLocked(regionID int) string {
|
||||
if c.derpMap == nil {
|
||||
return ""
|
||||
|
|
|
@ -682,16 +682,7 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) {
|
|||
ni.OSHasIPv6.Set(report.OSHasIPv6)
|
||||
ni.WorkingUDP.Set(report.UDP)
|
||||
ni.WorkingICMPv4.Set(report.ICMPv4)
|
||||
ni.PreferredDERP = report.PreferredDERP
|
||||
|
||||
if ni.PreferredDERP == 0 {
|
||||
// Perhaps UDP is blocked. Pick a deterministic but arbitrary
|
||||
// one.
|
||||
ni.PreferredDERP = c.pickDERPFallback()
|
||||
}
|
||||
if !c.setNearestDERP(ni.PreferredDERP) {
|
||||
ni.PreferredDERP = 0
|
||||
}
|
||||
ni.PreferredDERP = c.maybeSetNearestDERP(report)
|
||||
ni.FirewallMode = hostinfo.FirewallMode()
|
||||
|
||||
c.callNetInfoCallback(ni)
|
||||
|
|
|
@ -41,9 +41,11 @@ import (
|
|||
"tailscale.com/derp/derphttp"
|
||||
"tailscale.com/disco"
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/ipn/ipnstate"
|
||||
"tailscale.com/net/connstats"
|
||||
"tailscale.com/net/netaddr"
|
||||
"tailscale.com/net/netcheck"
|
||||
"tailscale.com/net/packet"
|
||||
"tailscale.com/net/ping"
|
||||
"tailscale.com/net/stun/stuntest"
|
||||
|
@ -3017,3 +3019,118 @@ func TestAddrForPingSizeLocked(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaybeSetNearestDERP(t *testing.T) {
|
||||
derpMap := &tailcfg.DERPMap{
|
||||
Regions: map[int]*tailcfg.DERPRegion{
|
||||
1: {
|
||||
RegionID: 1,
|
||||
RegionCode: "test",
|
||||
Nodes: []*tailcfg.DERPNode{
|
||||
{
|
||||
Name: "t1",
|
||||
RegionID: 1,
|
||||
HostName: "test-node.unused",
|
||||
IPv4: "127.0.0.1",
|
||||
IPv6: "none",
|
||||
},
|
||||
},
|
||||
},
|
||||
21: {
|
||||
RegionID: 21,
|
||||
RegionCode: "tor",
|
||||
Nodes: []*tailcfg.DERPNode{
|
||||
{
|
||||
Name: "21b",
|
||||
RegionID: 21,
|
||||
HostName: "tor.test-node.unused",
|
||||
IPv4: "127.0.0.1",
|
||||
IPv6: "none",
|
||||
},
|
||||
},
|
||||
},
|
||||
31: {
|
||||
RegionID: 31,
|
||||
RegionCode: "fallback",
|
||||
Nodes: []*tailcfg.DERPNode{
|
||||
{
|
||||
Name: "31b",
|
||||
RegionID: 31,
|
||||
HostName: "fallback.test-node.unused",
|
||||
IPv4: "127.0.0.1",
|
||||
IPv6: "none",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Ensure that our fallback code always picks a deterministic value.
|
||||
tstest.Replace(t, &pickDERPFallbackForTests, func() int { return 31 })
|
||||
|
||||
// Actually test this code path.
|
||||
tstest.Replace(t, &checkControlHealthDuringNearestDERPInTests, true)
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
old int
|
||||
reportDERP int
|
||||
connectedToControl bool
|
||||
want int
|
||||
}{
|
||||
{
|
||||
name: "connected_with_report_derp",
|
||||
old: 1,
|
||||
reportDERP: 21,
|
||||
connectedToControl: true,
|
||||
want: 21,
|
||||
},
|
||||
{
|
||||
name: "not_connected_with_report_derp",
|
||||
old: 1,
|
||||
reportDERP: 21,
|
||||
connectedToControl: false,
|
||||
want: 1, // no change
|
||||
},
|
||||
{
|
||||
name: "connected_no_derp",
|
||||
old: 1,
|
||||
reportDERP: 0,
|
||||
connectedToControl: true,
|
||||
want: 1, // no change
|
||||
},
|
||||
{
|
||||
name: "connected_no_derp_fallback",
|
||||
old: 0,
|
||||
reportDERP: 0,
|
||||
connectedToControl: true,
|
||||
want: 31, // deterministic fallback
|
||||
},
|
||||
}
|
||||
for _, tt := range testCases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
c := newConn()
|
||||
c.logf = t.Logf
|
||||
c.myDerp = tt.old
|
||||
c.derpMap = derpMap
|
||||
|
||||
report := &netcheck.Report{PreferredDERP: tt.reportDERP}
|
||||
|
||||
oldConnected := health.GetInPollNetMap()
|
||||
if tt.connectedToControl != oldConnected {
|
||||
if tt.connectedToControl {
|
||||
health.GotStreamedMapResponse()
|
||||
t.Cleanup(health.SetOutOfPollNetMap)
|
||||
} else {
|
||||
health.SetOutOfPollNetMap()
|
||||
t.Cleanup(health.GotStreamedMapResponse)
|
||||
}
|
||||
}
|
||||
|
||||
got := c.maybeSetNearestDERP(report)
|
||||
if got != tt.want {
|
||||
t.Errorf("got new DERP region %d, want %d", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue