wgengine/magicsock: update discokeys on netmap change.

Fixes #3008.

Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
David Anderson 2021-10-06 10:18:12 -07:00 committed by Dave Anderson
parent 2d11503cff
commit 830f641c6b
2 changed files with 122 additions and 30 deletions

View File

@ -2072,6 +2072,7 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) {
}
if ep, ok := c.peerMap.endpointForDiscoKey(n.DiscoKey); ok && ep.publicKey == n.Key {
ep.updateFromNode(n)
c.peerMap.upsertDiscoEndpoint(ep) // maybe update discokey mappings in peerMap
} else if ep != nil {
// Endpoint no longer belongs to the same node. We'll
// create the new endpoint below.
@ -2095,6 +2096,7 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) {
for _, n := range nm.Peers {
if ep, ok := c.peerMap.endpointForNodeKey(n.Key); ok {
ep.updateFromNode(n)
c.peerMap.upsertDiscoEndpoint(ep) // maybe update discokey mappings in peerMap
continue
}
@ -2156,6 +2158,13 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) {
}
})
}
// discokeys might have changed in the above. Discard unused cached keys.
for discoKey := range c.sharedDiscoKey {
if _, ok := c.peerMap.endpointForDiscoKey(discoKey); !ok {
delete(c.sharedDiscoKey, discoKey)
}
}
}
func (c *Conn) wantDerpLocked() bool { return c.derpMap != nil }
@ -3415,6 +3424,12 @@ func (de *endpoint) updateFromNode(n *tailcfg.Node) {
de.mu.Lock()
defer de.mu.Unlock()
if de.discoKey != n.DiscoKey {
de.c.logf("[v1] magicsock: disco: node %s changed from discokey %s to %s", de.publicKey.ShortString(), de.discoKey, n.DiscoKey)
de.discoKey = n.DiscoKey
de.discoShort = de.discoKey.ShortString()
de.resetLocked()
}
if n.DERP == "" {
de.derpAddr = netaddr.IPPort{}
} else {
@ -3702,8 +3717,18 @@ func (de *endpoint) stopAndReset() {
de.c.logf("[v1] magicsock: doing cleanup for discovery key %x", de.discoKey[:])
// Zero these fields so if the user re-starts the network, the discovery
// state isn't a mix of before & after two sessions.
de.resetLocked()
if de.heartBeatTimer != nil {
de.heartBeatTimer.Stop()
de.heartBeatTimer = nil
}
de.pendingCLIPings = nil
}
// resetLocked clears all the endpoint's p2p state, reverting it to a
// DERP-only endpoint. It does not stop the endpoint's heartbeat
// timer, if one is running.
func (de *endpoint) resetLocked() {
de.lastSend = 0
de.lastFullPing = 0
de.bestAddr = addrLatency{}
@ -3712,15 +3737,9 @@ func (de *endpoint) stopAndReset() {
for _, es := range de.endpointState {
es.lastPing = 0
}
for txid, sp := range de.sentPing {
de.removeSentPingLocked(txid, sp)
}
if de.heartBeatTimer != nil {
de.heartBeatTimer.Stop()
de.heartBeatTimer = nil
}
de.pendingCLIPings = nil
}
func (de *endpoint) numStopAndReset() int64 {

View File

@ -136,13 +136,17 @@ type magicStack struct {
// friends. You need to call conn.SetNetworkMap and dev.Reconfig
// before anything interesting happens.
func newMagicStack(t testing.TB, logf logger.Logf, l nettype.PacketListener, derpMap *tailcfg.DERPMap) *magicStack {
t.Helper()
privateKey, err := wgkey.NewPrivate()
if err != nil {
t.Fatalf("generating private key: %v", err)
}
return newMagicStackWithKey(t, logf, l, derpMap, privateKey)
}
func newMagicStackWithKey(t testing.TB, logf logger.Logf, l nettype.PacketListener, derpMap *tailcfg.DERPMap, privateKey wgkey.Private) *magicStack {
t.Helper()
epCh := make(chan []tailcfg.Endpoint, 100) // arbitrary
conn, err := NewConn(Options{
Logf: logf,
@ -647,6 +651,76 @@ func TestNoDiscoKey(t *testing.T) {
}
}
func TestDiscokeyChange(t *testing.T) {
tstest.PanicOnLog()
tstest.ResourceCheck(t)
derpMap, cleanup := runDERPAndStun(t, t.Logf, localhostListener{}, netaddr.IPv4(127, 0, 0, 1))
defer cleanup()
m1Key, err := wgkey.NewPrivate()
if err != nil {
t.Fatalf("generating nodekey: %v", err)
}
m1 := newMagicStackWithKey(t, t.Logf, localhostListener{}, derpMap, m1Key)
defer m1.Close()
m2 := newMagicStack(t, t.Logf, localhostListener{}, derpMap)
defer m2.Close()
var (
mu sync.Mutex
// Start with some random discoKey that isn't actually m1's key,
// to simulate m2 coming up with knowledge of an old, expired
// discokey. We'll switch to the correct one later in the test.
m1DiscoKey = tailcfg.DiscoKey(key.NewPrivate().Public())
)
setm1Key := func(idx int, nm *netmap.NetworkMap) {
if idx != 1 {
// only mutate m2's netmap
return
}
if len(nm.Peers) != 1 {
// m1 not in netmap yet.
return
}
mu.Lock()
defer mu.Unlock()
nm.Peers[0].DiscoKey = m1DiscoKey
}
cleanupMesh := meshStacks(t.Logf, setm1Key, m1, m2)
defer cleanupMesh()
// Wait for both peers to know about each other.
for {
if s1 := m1.Status(); len(s1.Peer) != 1 {
time.Sleep(10 * time.Millisecond)
continue
}
if s2 := m2.Status(); len(s2.Peer) != 1 {
time.Sleep(10 * time.Millisecond)
continue
}
break
}
mu.Lock()
m1DiscoKey = m1.conn.DiscoPublicKey()
mu.Unlock()
// Manually trigger an endpoint update to meshStacks, so it hands
// m2 a new netmap.
m1.conn.mu.Lock()
m1.epCh <- m1.conn.lastEndpoints
m1.conn.mu.Unlock()
cleanup = newPinger(t, t.Logf, m1, m2)
defer cleanup()
mustDirect(t, t.Logf, m1, m2)
mustDirect(t, t.Logf, m2, m1)
}
func TestActiveDiscovery(t *testing.T) {
t.Run("simple_internet", func(t *testing.T) {
t.Parallel()
@ -878,30 +952,29 @@ func testActiveDiscovery(t *testing.T, d *devices) {
// Everything is now up and running, active discovery should find
// a direct path between our peers. Wait for it to switch away
// from DERP.
mustDirect := func(m1, m2 *magicStack) {
lastLog := time.Now().Add(-time.Minute)
// See https://github.com/tailscale/tailscale/issues/654 for a discussion of this deadline.
for deadline := time.Now().Add(10 * time.Second); time.Now().Before(deadline); time.Sleep(10 * time.Millisecond) {
pst := m1.Status().Peer[m2.Public()]
if pst.CurAddr != "" {
logf("direct link %s->%s found with addr %s", m1, m2, pst.CurAddr)
return
}
if now := time.Now(); now.Sub(lastLog) > time.Second {
logf("no direct path %s->%s yet, addrs %v", m1, m2, pst.Addrs)
lastLog = now
}
}
t.Errorf("magicsock did not find a direct path from %s to %s", m1, m2)
}
mustDirect(m1, m2)
mustDirect(m2, m1)
mustDirect(t, logf, m1, m2)
mustDirect(t, logf, m2, m1)
logf("starting cleanup")
}
func mustDirect(t *testing.T, logf logger.Logf, m1, m2 *magicStack) {
lastLog := time.Now().Add(-time.Minute)
// See https://github.com/tailscale/tailscale/issues/654 for a discussion of this deadline.
for deadline := time.Now().Add(10 * time.Second); time.Now().Before(deadline); time.Sleep(10 * time.Millisecond) {
pst := m1.Status().Peer[m2.Public()]
if pst.CurAddr != "" {
logf("direct link %s->%s found with addr %s", m1, m2, pst.CurAddr)
return
}
if now := time.Now(); now.Sub(lastLog) > time.Second {
logf("no direct path %s->%s yet, addrs %v", m1, m2, pst.Addrs)
lastLog = now
}
}
t.Errorf("magicsock did not find a direct path from %s to %s", m1, m2)
}
func testTwoDevicePing(t *testing.T, d *devices) {
tstest.PanicOnLog()
tstest.ResourceCheck(t)