wgengine,magicsock: fix two lazy wireguard config issues
1) we weren't waking up a discoEndpoint that once existed and went idle for 5 minutes and then got a disco message again. 2) userspaceEngine.noteReceiveActivity had a buggy check; fixed and added a test
This commit is contained in:
parent
78c2e1ff83
commit
9a346fd8b4
|
@ -1360,13 +1360,15 @@ func wgRecvAddr(e conn.Endpoint, ipp netaddr.IPPort, addr *net.UDPAddr) *net.UDP
|
|||
return ipp.UDPAddr()
|
||||
}
|
||||
|
||||
// noteRecvActivity calls the magicsock.Conn.noteRecvActivity hook if
|
||||
// e is a discovery-capable peer.
|
||||
// noteRecvActivityFromEndpoint calls the c.noteRecvActivity hook if
|
||||
// e is a discovery-capable peer and this is the first receive activity
|
||||
// it's got in awhile (in last 10 seconds).
|
||||
//
|
||||
// This should be called whenever a packet arrives from e.
|
||||
func noteRecvActivity(e conn.Endpoint) {
|
||||
if de, ok := e.(*discoEndpoint); ok {
|
||||
de.onRecvActivity()
|
||||
func (c *Conn) noteRecvActivityFromEndpoint(e conn.Endpoint) {
|
||||
de, ok := e.(*discoEndpoint)
|
||||
if ok && c.noteRecvActivity != nil && de.isFirstRecvActivityInAwhile() {
|
||||
c.noteRecvActivity(de.discoKey)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1377,7 +1379,7 @@ Top:
|
|||
c.bufferedIPv4From = netaddr.IPPort{}
|
||||
addr = from.UDPAddr()
|
||||
ep := c.findEndpoint(from, addr)
|
||||
noteRecvActivity(ep)
|
||||
c.noteRecvActivityFromEndpoint(ep)
|
||||
return copy(b, c.bufferedIPv4Packet), ep, wgRecvAddr(ep, from, addr), nil
|
||||
}
|
||||
|
||||
|
@ -1489,7 +1491,7 @@ Top:
|
|||
ep = c.findEndpoint(ipp, addr)
|
||||
}
|
||||
if !didNoteRecvActivity {
|
||||
noteRecvActivity(ep)
|
||||
c.noteRecvActivityFromEndpoint(ep)
|
||||
}
|
||||
return n, ep, wgRecvAddr(ep, ipp, addr), nil
|
||||
}
|
||||
|
@ -1517,7 +1519,7 @@ func (c *Conn) ReceiveIPv6(b []byte) (int, conn.Endpoint, *net.UDPAddr, error) {
|
|||
}
|
||||
|
||||
ep := c.findEndpoint(ipp, addr)
|
||||
noteRecvActivity(ep)
|
||||
c.noteRecvActivityFromEndpoint(ep)
|
||||
return n, ep, wgRecvAddr(ep, ipp, addr), nil
|
||||
}
|
||||
}
|
||||
|
@ -1615,8 +1617,9 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
de, ok := c.endpointOfDisco[sender]
|
||||
if !ok {
|
||||
needsRecvActivityCall := false
|
||||
de, endpointFound0 := c.endpointOfDisco[sender]
|
||||
if !endpointFound0 {
|
||||
// We don't have an active endpoint for this sender but we knew about the node, so
|
||||
// it's an idle endpoint that doesn't yet exist in the wireguard config. We now have
|
||||
// to notify the userspace engine (via noteRecvActivity) so wireguard-go can create
|
||||
|
@ -1628,6 +1631,11 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
|
|||
c.logf("magicsock: [unexpected] have node without endpoint, without c.noteRecvActivity hook")
|
||||
return false
|
||||
}
|
||||
needsRecvActivityCall = true
|
||||
} else {
|
||||
needsRecvActivityCall = de.isFirstRecvActivityInAwhile()
|
||||
}
|
||||
if needsRecvActivityCall && c.noteRecvActivity != nil {
|
||||
// We can't hold Conn.mu while calling noteRecvActivity.
|
||||
// noteRecvActivity acquires userspaceEngine.wgLock (and per our
|
||||
// lock ordering rules: wgLock must come first), and also calls
|
||||
|
@ -1651,7 +1659,9 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
|
|||
c.logf("magicsock: [unexpected] lazy endpoint not created for %v, %v", peerNode.Key.ShortString(), sender.ShortString())
|
||||
return false
|
||||
}
|
||||
c.logf("magicsock: lazy endpoint created via disco message for %v, %v", peerNode.Key.ShortString(), sender.ShortString())
|
||||
if !endpointFound0 {
|
||||
c.logf("magicsock: lazy endpoint created via disco message for %v, %v", peerNode.Key.ShortString(), sender.ShortString())
|
||||
}
|
||||
}
|
||||
|
||||
// First, do we even know (and thus care) about this sender? If not,
|
||||
|
@ -2676,17 +2686,6 @@ func (c *Conn) CreateEndpoint(pubKey [32]byte, addrs string) (conn.Endpoint, err
|
|||
sentPing: map[stun.TxID]sentPing{},
|
||||
endpointState: map[netaddr.IPPort]*endpointState{},
|
||||
}
|
||||
lastRecvTime := new(int64) // atomic
|
||||
de.onRecvActivity = func() {
|
||||
now := time.Now().Unix()
|
||||
old := atomic.LoadInt64(lastRecvTime)
|
||||
if old == 0 || old <= now-10 {
|
||||
atomic.StoreInt64(lastRecvTime, now)
|
||||
if c.noteRecvActivity != nil {
|
||||
c.noteRecvActivity(de.discoKey)
|
||||
}
|
||||
}
|
||||
}
|
||||
de.initFakeUDPAddr()
|
||||
de.updateFromNode(c.nodeOfDisco[de.discoKey])
|
||||
c.endpointOfDisco[de.discoKey] = de
|
||||
|
@ -2958,6 +2957,9 @@ func udpAddrDebugString(ua net.UDPAddr) string {
|
|||
// discoEndpoint is a wireguard/conn.Endpoint for new-style peers that
|
||||
// advertise a DiscoKey and participate in active discovery.
|
||||
type discoEndpoint struct {
|
||||
// atomically accessed; declared first for alignment reasons
|
||||
lastRecvUnixAtomic int64
|
||||
|
||||
// These fields are initialized once and never modified.
|
||||
c *Conn
|
||||
publicKey tailcfg.NodeKey // peer public key (for WireGuard + DERP)
|
||||
|
@ -2966,7 +2968,6 @@ type discoEndpoint struct {
|
|||
fakeWGAddr netaddr.IPPort // the UDP address we tell wireguard-go we're using
|
||||
fakeWGAddrStd *net.UDPAddr // the *net.UDPAddr form of fakeWGAddr
|
||||
wgEndpointHostPort string // string from CreateEndpoint: "<hex-discovery-key>.disco.tailscale:12345"
|
||||
onRecvActivity func()
|
||||
|
||||
// Owned by Conn.mu:
|
||||
lastPingFrom netaddr.IPPort
|
||||
|
@ -3063,6 +3064,19 @@ func (de *discoEndpoint) initFakeUDPAddr() {
|
|||
de.fakeWGAddrStd = de.fakeWGAddr.UDPAddr()
|
||||
}
|
||||
|
||||
// isFirstRecvActivityInAwhile notes that receive activity has occured for this
|
||||
// endpoint and reports whether it's been at least 10 seconds since the last
|
||||
// receive activity (including having never received from this peer before).
|
||||
func (de *discoEndpoint) isFirstRecvActivityInAwhile() bool {
|
||||
now := time.Now().Unix()
|
||||
old := atomic.LoadInt64(&de.lastRecvUnixAtomic)
|
||||
if old <= now-10 {
|
||||
atomic.StoreInt64(&de.lastRecvUnixAtomic, now)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// String exists purely so wireguard-go internals can log.Printf("%v")
|
||||
// its internal conn.Endpoints and we don't end up with data races
|
||||
// from fmt (via log) reading mutex fields and such.
|
||||
|
|
|
@ -22,6 +22,7 @@ import (
|
|||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/tailscale/wireguard-go/device"
|
||||
|
@ -1322,3 +1323,17 @@ func stringifyConfig(cfg wgcfg.Config) string {
|
|||
}
|
||||
return string(j)
|
||||
}
|
||||
|
||||
func TestDiscoEndpointAlignment(t *testing.T) {
|
||||
var de discoEndpoint
|
||||
off := unsafe.Offsetof(de.lastRecvUnixAtomic)
|
||||
if off%8 != 0 {
|
||||
t.Fatalf("lastRecvUnixAtomic is not 8-byte aligned")
|
||||
}
|
||||
if !de.isFirstRecvActivityInAwhile() { // verify this doesn't panic on 32-bit
|
||||
t.Error("expected true")
|
||||
}
|
||||
if de.isFirstRecvActivityInAwhile() {
|
||||
t.Error("expected false on second call")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,6 +87,7 @@ type userspaceEngine struct {
|
|||
logf logger.Logf
|
||||
reqCh chan struct{}
|
||||
waitCh chan struct{} // chan is closed when first Close call completes; contrast with closing bool
|
||||
timeNow func() time.Time
|
||||
tundev *tstun.TUN
|
||||
wgdev *device.Device
|
||||
router router.Router
|
||||
|
@ -94,6 +95,8 @@ type userspaceEngine struct {
|
|||
magicConn *magicsock.Conn
|
||||
linkMon *monitor.Mon
|
||||
|
||||
testMaybeReconfigHook func() // for tests; if non-nil, fires if maybeReconfigWireguardLocked called
|
||||
|
||||
// localAddrs is the set of IP addresses assigned to the local
|
||||
// tunnel interface. It's used to reflect local packets
|
||||
// incorrectly sent to us.
|
||||
|
@ -199,6 +202,7 @@ func newUserspaceEngineAdvanced(conf EngineConfig) (_ Engine, reterr error) {
|
|||
logf := conf.Logf
|
||||
|
||||
e := &userspaceEngine{
|
||||
timeNow: time.Now,
|
||||
logf: logf,
|
||||
reqCh: make(chan struct{}, 1),
|
||||
waitCh: make(chan struct{}),
|
||||
|
@ -622,12 +626,12 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) {
|
|||
e.wgLock.Lock()
|
||||
defer e.wgLock.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
was, ok := e.recvActivityAt[dk]
|
||||
if !ok {
|
||||
// Not a trimmable peer we care about tracking. (See isTrimmablePeer)
|
||||
return
|
||||
}
|
||||
now := e.timeNow()
|
||||
e.recvActivityAt[dk] = now
|
||||
|
||||
// If the last activity time jumped a bunch (say, at least
|
||||
|
@ -636,7 +640,7 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) {
|
|||
// lazyPeerIdleThreshold without the divide by 2, but
|
||||
// maybeReconfigWireguardLocked is cheap enough to call every
|
||||
// couple minutes (just not on every packet).
|
||||
if was.IsZero() || now.Sub(was) < -lazyPeerIdleThreshold/2 {
|
||||
if was.IsZero() || now.Sub(was) > lazyPeerIdleThreshold/2 {
|
||||
e.maybeReconfigWireguardLocked()
|
||||
}
|
||||
}
|
||||
|
@ -677,6 +681,11 @@ func discoKeyFromPeer(p *wgcfg.Peer) tailcfg.DiscoKey {
|
|||
|
||||
// e.wgLock must be held.
|
||||
func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
|
||||
if hook := e.testMaybeReconfigHook; hook != nil {
|
||||
hook()
|
||||
return nil
|
||||
}
|
||||
|
||||
full := e.lastCfgFull
|
||||
|
||||
// Compute a minimal config to pass to wireguard-go
|
||||
|
@ -689,7 +698,7 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
|
|||
// the past 5 minutes. That's more than WireGuard's key
|
||||
// rotation time anyway so it's no harm if we remove it
|
||||
// later if it's been inactive.
|
||||
activeCutoff := time.Now().Add(-lazyPeerIdleThreshold)
|
||||
activeCutoff := e.timeNow().Add(-lazyPeerIdleThreshold)
|
||||
|
||||
// Not all peers can be trimmed from the network map (see
|
||||
// isTrimmablePeer). For those are are trimmable, keep track
|
||||
|
@ -765,7 +774,7 @@ func (e *userspaceEngine) updateActivityMapsLocked(trackDisco []tailcfg.DiscoKey
|
|||
if fn == nil {
|
||||
// This is the func that gets run on every outgoing packet for tracked IPs:
|
||||
fn = func() {
|
||||
now := time.Now().Unix()
|
||||
now := e.timeNow().Unix()
|
||||
old := atomic.LoadInt64(timePtr)
|
||||
|
||||
// How long's it been since we last sent a packet?
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package wgengine
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/wgengine/tstun"
|
||||
)
|
||||
|
||||
func TestNoteReceiveActivity(t *testing.T) {
|
||||
now := time.Unix(1, 0)
|
||||
tick := func(d time.Duration) { now = now.Add(d) }
|
||||
var logBuf bytes.Buffer
|
||||
|
||||
confc := make(chan bool, 1)
|
||||
gotConf := func() bool {
|
||||
select {
|
||||
case <-confc:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
e := &userspaceEngine{
|
||||
timeNow: func() time.Time { return now },
|
||||
recvActivityAt: map[tailcfg.DiscoKey]time.Time{},
|
||||
logf: func(format string, a ...interface{}) {
|
||||
fmt.Fprintf(&logBuf, format, a...)
|
||||
},
|
||||
tundev: new(tstun.TUN),
|
||||
testMaybeReconfigHook: func() { confc <- true },
|
||||
}
|
||||
ra := e.recvActivityAt
|
||||
|
||||
dk := tailcfg.DiscoKey(key.NewPrivate().Public())
|
||||
|
||||
// Activity on an untracked key should do nothing.
|
||||
e.noteReceiveActivity(dk)
|
||||
if len(ra) != 0 {
|
||||
t.Fatalf("unexpected growth in map: now has %d keys; want 0", len(ra))
|
||||
}
|
||||
if logBuf.Len() != 0 {
|
||||
t.Fatalf("unexpected log write (and thus activity): %s", logBuf.Bytes())
|
||||
}
|
||||
|
||||
// Now track it and expect updates.
|
||||
ra[dk] = time.Time{}
|
||||
e.noteReceiveActivity(dk)
|
||||
if len(ra) != 1 {
|
||||
t.Fatalf("unexpected growth in map: now has %d keys; want 1", len(ra))
|
||||
}
|
||||
if got := ra[dk]; got != now {
|
||||
t.Fatalf("time in map = %v; want %v", got, now)
|
||||
}
|
||||
if !gotConf() {
|
||||
t.Fatalf("didn't get expected reconfig")
|
||||
}
|
||||
|
||||
// With updates 1 second apart, don't expect a reconfig.
|
||||
for i := 0; i < 300; i++ {
|
||||
tick(time.Second)
|
||||
e.noteReceiveActivity(dk)
|
||||
if len(ra) != 1 {
|
||||
t.Fatalf("map len = %d; want 1", len(ra))
|
||||
}
|
||||
if got := ra[dk]; got != now {
|
||||
t.Fatalf("time in map = %v; want %v", got, now)
|
||||
}
|
||||
if gotConf() {
|
||||
t.Fatalf("unexpected reconfig")
|
||||
}
|
||||
}
|
||||
|
||||
// But if there's a big jump it should get an update.
|
||||
tick(3 * time.Minute)
|
||||
e.noteReceiveActivity(dk)
|
||||
if !gotConf() {
|
||||
t.Fatalf("expected config")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue