2020-02-05 22:16:58 +00:00
|
|
|
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package magicsock
|
|
|
|
|
|
|
|
import (
|
2020-03-03 15:39:40 +00:00
|
|
|
"bytes"
|
2020-07-25 01:32:18 +01:00
|
|
|
"context"
|
2020-03-03 21:50:47 +00:00
|
|
|
crand "crypto/rand"
|
|
|
|
"crypto/tls"
|
2021-02-10 22:47:26 +00:00
|
|
|
"errors"
|
2020-02-05 22:16:58 +00:00
|
|
|
"fmt"
|
2020-07-04 06:26:53 +01:00
|
|
|
"io/ioutil"
|
2020-02-05 22:16:58 +00:00
|
|
|
"net"
|
2020-03-03 21:50:47 +00:00
|
|
|
"net/http"
|
|
|
|
"net/http/httptest"
|
2020-03-06 21:35:59 +00:00
|
|
|
"os"
|
2021-02-07 05:27:02 +00:00
|
|
|
"runtime"
|
2020-08-06 18:23:16 +01:00
|
|
|
"strconv"
|
2020-02-05 22:16:58 +00:00
|
|
|
"strings"
|
2020-05-14 04:44:58 +01:00
|
|
|
"sync"
|
2020-02-05 22:16:58 +00:00
|
|
|
"testing"
|
|
|
|
"time"
|
2020-08-06 22:57:03 +01:00
|
|
|
"unsafe"
|
2020-03-03 11:51:31 +00:00
|
|
|
|
2020-06-26 22:38:53 +01:00
|
|
|
"golang.org/x/crypto/nacl/box"
|
2021-05-25 20:42:22 +01:00
|
|
|
"golang.zx2c4.com/wireguard/device"
|
|
|
|
"golang.zx2c4.com/wireguard/tun/tuntest"
|
2020-06-30 20:22:42 +01:00
|
|
|
"inet.af/netaddr"
|
2020-03-03 21:50:47 +00:00
|
|
|
"tailscale.com/derp"
|
|
|
|
"tailscale.com/derp/derphttp"
|
2020-07-25 01:32:18 +01:00
|
|
|
"tailscale.com/ipn/ipnstate"
|
2020-05-25 17:15:50 +01:00
|
|
|
"tailscale.com/net/stun/stuntest"
|
2021-03-27 05:14:08 +00:00
|
|
|
"tailscale.com/net/tstun"
|
2020-05-17 17:51:38 +01:00
|
|
|
"tailscale.com/tailcfg"
|
Add tstest.PanicOnLog(), and fix various problems detected by this.
If a test calls log.Printf, 'go test' horrifyingly rearranges the
output to no longer be in chronological order, which makes debugging
virtually impossible. Let's stop that from happening by making
log.Printf panic if called from any module, no matter how deep, during
tests.
This required us to change the default error handler in at least one
http.Server, as well as plumbing a bunch of logf functions around,
especially in magicsock and wgengine, but also in logtail and backoff.
To add insult to injury, 'go test' also rearranges the output when a
parent test has multiple sub-tests (all the sub-test's t.Logf is always
printed after all the parent tests t.Logf), so we need to screw around
with a special Logf that can point at the "current" t (current_t.Logf)
in some places. Probably our entire way of using subtests is wrong,
since 'go test' would probably like to run them all in parallel if you
called t.Parallel(), but it definitely can't because the're all
manipulating the shared state created by the parent test. They should
probably all be separate toplevel tests instead, with common
setup/teardown logic. But that's a job for another time.
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
2020-05-14 03:59:54 +01:00
|
|
|
"tailscale.com/tstest"
|
2020-07-10 22:26:04 +01:00
|
|
|
"tailscale.com/tstest/natlab"
|
2020-03-03 21:50:47 +00:00
|
|
|
"tailscale.com/types/key"
|
2020-03-07 01:50:36 +00:00
|
|
|
"tailscale.com/types/logger"
|
2021-02-05 23:44:46 +00:00
|
|
|
"tailscale.com/types/netmap"
|
2020-07-10 22:26:04 +01:00
|
|
|
"tailscale.com/types/nettype"
|
2020-12-30 01:22:56 +00:00
|
|
|
"tailscale.com/types/wgkey"
|
2021-02-10 19:49:30 +00:00
|
|
|
"tailscale.com/util/cibuild"
|
2021-09-16 00:43:44 +01:00
|
|
|
"tailscale.com/util/racebuild"
|
2020-05-13 14:16:17 +01:00
|
|
|
"tailscale.com/wgengine/filter"
|
2021-01-29 20:16:36 +00:00
|
|
|
"tailscale.com/wgengine/wgcfg"
|
2021-02-05 20:44:43 +00:00
|
|
|
"tailscale.com/wgengine/wgcfg/nmcfg"
|
2021-01-21 20:33:54 +00:00
|
|
|
"tailscale.com/wgengine/wglog"
|
2020-02-05 22:16:58 +00:00
|
|
|
)
|
|
|
|
|
2020-10-28 15:23:12 +00:00
|
|
|
func init() {
|
|
|
|
os.Setenv("IN_TS_TEST", "1")
|
|
|
|
}
|
|
|
|
|
2020-05-14 18:01:48 +01:00
|
|
|
// WaitReady waits until the magicsock is entirely initialized and connected
|
|
|
|
// to its home DERP server. This is normally not necessary, since magicsock
|
|
|
|
// is intended to be entirely asynchronous, but it helps eliminate race
|
|
|
|
// conditions in tests. In particular, you can't expect two test magicsocks
|
|
|
|
// to be able to connect to each other through a test DERP unless they are
|
|
|
|
// both fully initialized before you try.
|
2020-12-03 04:12:14 +00:00
|
|
|
func (c *Conn) WaitReady(t testing.TB) {
|
2020-05-14 18:01:48 +01:00
|
|
|
t.Helper()
|
|
|
|
timer := time.NewTimer(10 * time.Second)
|
|
|
|
defer timer.Stop()
|
|
|
|
select {
|
|
|
|
case <-c.derpStarted:
|
|
|
|
return
|
|
|
|
case <-c.connCtx.Done():
|
|
|
|
t.Fatalf("magicsock.Conn closed while waiting for readiness")
|
|
|
|
case <-timer.C:
|
|
|
|
t.Fatalf("timeout waiting for readiness")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-24 22:19:20 +01:00
|
|
|
func runDERPAndStun(t *testing.T, logf logger.Logf, l nettype.PacketListener, stunIP netaddr.IP) (derpMap *tailcfg.DERPMap, cleanup func()) {
|
|
|
|
var serverPrivateKey key.Private
|
|
|
|
if _, err := crand.Read(serverPrivateKey[:]); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
d := derp.NewServer(serverPrivateKey, logf)
|
|
|
|
|
|
|
|
httpsrv := httptest.NewUnstartedServer(derphttp.Handler(d))
|
|
|
|
httpsrv.Config.ErrorLog = logger.StdLogger(logf)
|
|
|
|
httpsrv.Config.TLSNextProto = make(map[string]func(*http.Server, *tls.Conn, http.Handler))
|
|
|
|
httpsrv.StartTLS()
|
|
|
|
|
|
|
|
stunAddr, stunCleanup := stuntest.ServeWithPacketListener(t, l)
|
|
|
|
|
|
|
|
m := &tailcfg.DERPMap{
|
|
|
|
Regions: map[int]*tailcfg.DERPRegion{
|
2021-04-01 17:54:54 +01:00
|
|
|
1: {
|
2020-07-24 22:19:20 +01:00
|
|
|
RegionID: 1,
|
|
|
|
RegionCode: "test",
|
|
|
|
Nodes: []*tailcfg.DERPNode{
|
|
|
|
{
|
2021-07-09 19:16:43 +01:00
|
|
|
Name: "t1",
|
|
|
|
RegionID: 1,
|
|
|
|
HostName: "test-node.unused",
|
|
|
|
IPv4: "127.0.0.1",
|
|
|
|
IPv6: "none",
|
|
|
|
STUNPort: stunAddr.Port,
|
|
|
|
DERPPort: httpsrv.Listener.Addr().(*net.TCPAddr).Port,
|
|
|
|
InsecureForTests: true,
|
|
|
|
STUNTestIP: stunIP.String(),
|
2020-07-24 22:19:20 +01:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup = func() {
|
|
|
|
httpsrv.CloseClientConnections()
|
|
|
|
httpsrv.Close()
|
|
|
|
d.Close()
|
|
|
|
stunCleanup()
|
|
|
|
}
|
|
|
|
|
|
|
|
return m, cleanup
|
|
|
|
}
|
|
|
|
|
|
|
|
// magicStack is a magicsock, plus all the stuff around it that's
|
|
|
|
// necessary to send and receive packets to test e2e wireguard
|
|
|
|
// happiness.
|
|
|
|
type magicStack struct {
|
2020-12-30 01:22:56 +00:00
|
|
|
privateKey wgkey.Private
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
epCh chan []tailcfg.Endpoint // endpoint updates produced by this peer
|
|
|
|
conn *Conn // the magicsock itself
|
|
|
|
tun *tuntest.ChannelTUN // TUN device to send/receive packets
|
|
|
|
tsTun *tstun.Wrapper // wrapped tun that implements filtering and wgengine hooks
|
|
|
|
dev *device.Device // the wireguard-go Device that connects the previous things
|
|
|
|
wgLogger *wglog.Logger // wireguard-go log wrapper
|
2020-07-24 22:19:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// newMagicStack builds and initializes an idle magicsock and
|
|
|
|
// friends. You need to call conn.SetNetworkMap and dev.Reconfig
|
|
|
|
// before anything interesting happens.
|
2021-08-26 03:39:20 +01:00
|
|
|
func newMagicStack(t testing.TB, logf logger.Logf, l nettype.PacketListener, derpMap *tailcfg.DERPMap) *magicStack {
|
2020-12-30 01:22:56 +00:00
|
|
|
privateKey, err := wgkey.NewPrivate()
|
2020-07-24 22:19:20 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("generating private key: %v", err)
|
|
|
|
}
|
|
|
|
|
2021-10-06 18:18:12 +01:00
|
|
|
return newMagicStackWithKey(t, logf, l, derpMap, privateKey)
|
|
|
|
}
|
|
|
|
|
|
|
|
func newMagicStackWithKey(t testing.TB, logf logger.Logf, l nettype.PacketListener, derpMap *tailcfg.DERPMap, privateKey wgkey.Private) *magicStack {
|
|
|
|
t.Helper()
|
|
|
|
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
epCh := make(chan []tailcfg.Endpoint, 100) // arbitrary
|
2020-07-24 22:19:20 +01:00
|
|
|
conn, err := NewConn(Options{
|
2021-08-26 06:26:25 +01:00
|
|
|
Logf: logf,
|
|
|
|
TestOnlyPacketListener: l,
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
EndpointsFunc: func(eps []tailcfg.Endpoint) {
|
2020-07-24 22:19:20 +01:00
|
|
|
epCh <- eps
|
|
|
|
},
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("constructing magicsock: %v", err)
|
|
|
|
}
|
|
|
|
conn.SetDERPMap(derpMap)
|
|
|
|
if err := conn.SetPrivateKey(privateKey); err != nil {
|
|
|
|
t.Fatalf("setting private key in magicsock: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
tun := tuntest.NewChannelTUN()
|
2021-03-27 06:13:20 +00:00
|
|
|
tsTun := tstun.Wrap(logf, tun.TUN())
|
2020-11-10 06:02:03 +00:00
|
|
|
tsTun.SetFilter(filter.NewAllowAllForTest(logf))
|
2020-07-24 22:19:20 +01:00
|
|
|
|
2021-01-21 20:33:54 +00:00
|
|
|
wgLogger := wglog.NewLogger(logf)
|
2021-05-11 23:24:37 +01:00
|
|
|
dev := device.NewDevice(tsTun, conn.Bind(), wgLogger.DeviceLogger)
|
2020-07-24 22:19:20 +01:00
|
|
|
dev.Up()
|
|
|
|
|
|
|
|
// Wait for magicsock to connect up to DERP.
|
|
|
|
conn.WaitReady(t)
|
|
|
|
|
|
|
|
// Wait for first endpoint update to be available
|
|
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
|
|
for len(epCh) == 0 && time.Now().Before(deadline) {
|
2020-07-28 18:04:09 +01:00
|
|
|
time.Sleep(100 * time.Millisecond)
|
2020-07-24 22:19:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return &magicStack{
|
|
|
|
privateKey: privateKey,
|
|
|
|
epCh: epCh,
|
|
|
|
conn: conn,
|
|
|
|
tun: tun,
|
|
|
|
tsTun: tsTun,
|
|
|
|
dev: dev,
|
2021-01-21 20:33:54 +00:00
|
|
|
wgLogger: wgLogger,
|
2020-07-24 22:19:20 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-21 20:33:54 +00:00
|
|
|
func (s *magicStack) Reconfig(cfg *wgcfg.Config) error {
|
|
|
|
s.wgLogger.SetPeers(cfg.Peers)
|
2021-01-29 20:16:36 +00:00
|
|
|
return wgcfg.ReconfigDevice(s.dev, cfg, s.conn.logf)
|
2021-01-21 20:33:54 +00:00
|
|
|
}
|
|
|
|
|
2020-07-27 16:09:54 +01:00
|
|
|
func (s *magicStack) String() string {
|
|
|
|
pub := s.Public()
|
|
|
|
return pub.ShortString()
|
|
|
|
}
|
|
|
|
|
2020-07-24 22:19:20 +01:00
|
|
|
func (s *magicStack) Close() {
|
|
|
|
s.dev.Close()
|
|
|
|
s.conn.Close()
|
|
|
|
}
|
|
|
|
|
2020-07-27 16:09:54 +01:00
|
|
|
func (s *magicStack) Public() key.Public {
|
|
|
|
return key.Public(s.privateKey.Public())
|
|
|
|
}
|
|
|
|
|
2020-07-25 01:32:18 +01:00
|
|
|
func (s *magicStack) Status() *ipnstate.Status {
|
|
|
|
var sb ipnstate.StatusBuilder
|
|
|
|
s.conn.UpdateStatus(&sb)
|
|
|
|
return sb.Status()
|
|
|
|
}
|
|
|
|
|
2020-07-27 21:25:25 +01:00
|
|
|
// IP returns the Tailscale IP address assigned to this magicStack.
|
|
|
|
//
|
|
|
|
// Something external needs to provide a NetworkMap and WireGuard
|
|
|
|
// configs to the magicStack in order for it to acquire an IP
|
|
|
|
// address. See meshStacks for one possible source of netmaps and IPs.
|
2021-03-29 21:50:44 +01:00
|
|
|
func (s *magicStack) IP() netaddr.IP {
|
2020-07-27 21:25:25 +01:00
|
|
|
for deadline := time.Now().Add(5 * time.Second); time.Now().Before(deadline); time.Sleep(10 * time.Millisecond) {
|
|
|
|
st := s.Status()
|
|
|
|
if len(st.TailscaleIPs) > 0 {
|
|
|
|
return st.TailscaleIPs[0]
|
|
|
|
}
|
2020-07-25 01:32:18 +01:00
|
|
|
}
|
2021-03-29 21:50:44 +01:00
|
|
|
panic("timed out waiting for magicstack to get an IP assigned")
|
2020-07-25 01:32:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// meshStacks monitors epCh on all given ms, and plumbs network maps
|
|
|
|
// and WireGuard configs into everyone to form a full mesh that has up
|
|
|
|
// to date endpoint info. Think of it as an extremely stripped down
|
|
|
|
// and purpose-built Tailscale control plane.
|
2021-08-26 03:39:20 +01:00
|
|
|
func meshStacks(logf logger.Logf, mutateNetmap func(idx int, nm *netmap.NetworkMap), ms ...*magicStack) (cleanup func()) {
|
2020-07-25 01:32:18 +01:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
|
|
|
|
// Serialize all reconfigurations globally, just to keep things
|
|
|
|
// simpler.
|
|
|
|
var (
|
|
|
|
mu sync.Mutex
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
eps = make([][]tailcfg.Endpoint, len(ms))
|
2020-07-25 01:32:18 +01:00
|
|
|
)
|
|
|
|
|
2021-02-05 23:44:46 +00:00
|
|
|
buildNetmapLocked := func(myIdx int) *netmap.NetworkMap {
|
2020-07-25 01:32:18 +01:00
|
|
|
me := ms[myIdx]
|
2021-02-05 23:44:46 +00:00
|
|
|
nm := &netmap.NetworkMap{
|
2020-07-25 01:32:18 +01:00
|
|
|
PrivateKey: me.privateKey,
|
|
|
|
NodeKey: tailcfg.NodeKey(me.privateKey.Public()),
|
2021-05-15 02:07:28 +01:00
|
|
|
Addresses: []netaddr.IPPrefix{netaddr.IPPrefixFrom(netaddr.IPv4(1, 0, 0, byte(myIdx+1)), 32)},
|
2020-07-25 01:32:18 +01:00
|
|
|
}
|
|
|
|
for i, peer := range ms {
|
|
|
|
if i == myIdx {
|
|
|
|
continue
|
|
|
|
}
|
2021-05-15 02:07:28 +01:00
|
|
|
addrs := []netaddr.IPPrefix{netaddr.IPPrefixFrom(netaddr.IPv4(1, 0, 0, byte(i+1)), 32)}
|
2020-07-25 01:32:18 +01:00
|
|
|
peer := &tailcfg.Node{
|
|
|
|
ID: tailcfg.NodeID(i + 1),
|
|
|
|
Name: fmt.Sprintf("node%d", i+1),
|
|
|
|
Key: tailcfg.NodeKey(peer.privateKey.Public()),
|
|
|
|
DiscoKey: peer.conn.DiscoPublicKey(),
|
|
|
|
Addresses: addrs,
|
|
|
|
AllowedIPs: addrs,
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
Endpoints: epStrings(eps[i]),
|
2020-07-25 01:32:18 +01:00
|
|
|
DERP: "127.3.3.40:1",
|
|
|
|
}
|
|
|
|
nm.Peers = append(nm.Peers, peer)
|
|
|
|
}
|
|
|
|
|
2021-08-26 03:39:20 +01:00
|
|
|
if mutateNetmap != nil {
|
|
|
|
mutateNetmap(myIdx, nm)
|
|
|
|
}
|
2020-07-25 01:32:18 +01:00
|
|
|
return nm
|
|
|
|
}
|
|
|
|
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
updateEps := func(idx int, newEps []tailcfg.Endpoint) {
|
2020-07-25 01:32:18 +01:00
|
|
|
mu.Lock()
|
|
|
|
defer mu.Unlock()
|
|
|
|
|
|
|
|
eps[idx] = newEps
|
|
|
|
|
|
|
|
for i, m := range ms {
|
2021-02-05 23:44:46 +00:00
|
|
|
nm := buildNetmapLocked(i)
|
|
|
|
m.conn.SetNetworkMap(nm)
|
|
|
|
peerSet := make(map[key.Public]struct{}, len(nm.Peers))
|
|
|
|
for _, peer := range nm.Peers {
|
2020-07-25 01:32:18 +01:00
|
|
|
peerSet[key.Public(peer.Key)] = struct{}{}
|
|
|
|
}
|
|
|
|
m.conn.UpdatePeers(peerSet)
|
2021-02-25 04:05:23 +00:00
|
|
|
wg, err := nmcfg.WGCfg(nm, logf, netmap.AllowSingleHosts, "")
|
2020-07-25 01:32:18 +01:00
|
|
|
if err != nil {
|
|
|
|
// We're too far from the *testing.T to be graceful,
|
|
|
|
// blow up. Shouldn't happen anyway.
|
|
|
|
panic(fmt.Sprintf("failed to construct wgcfg from netmap: %v", err))
|
|
|
|
}
|
2021-01-21 20:33:54 +00:00
|
|
|
if err := m.Reconfig(wg); err != nil {
|
2021-08-30 23:32:06 +01:00
|
|
|
if ctx.Err() != nil || errors.Is(err, errConnClosed) {
|
2021-08-27 22:13:49 +01:00
|
|
|
// shutdown race, don't care.
|
|
|
|
return
|
|
|
|
}
|
2020-07-25 01:32:18 +01:00
|
|
|
panic(fmt.Sprintf("device reconfig failed: %v", err))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(len(ms))
|
|
|
|
for i := range ms {
|
|
|
|
go func(myIdx int) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case eps := <-ms[myIdx].epCh:
|
|
|
|
logf("conn%d endpoints update", myIdx+1)
|
|
|
|
updateEps(myIdx, eps)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}(i)
|
|
|
|
}
|
|
|
|
|
|
|
|
return func() {
|
|
|
|
cancel()
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-17 17:51:38 +01:00
|
|
|
func TestNewConn(t *testing.T) {
|
Add tstest.PanicOnLog(), and fix various problems detected by this.
If a test calls log.Printf, 'go test' horrifyingly rearranges the
output to no longer be in chronological order, which makes debugging
virtually impossible. Let's stop that from happening by making
log.Printf panic if called from any module, no matter how deep, during
tests.
This required us to change the default error handler in at least one
http.Server, as well as plumbing a bunch of logf functions around,
especially in magicsock and wgengine, but also in logtail and backoff.
To add insult to injury, 'go test' also rearranges the output when a
parent test has multiple sub-tests (all the sub-test's t.Logf is always
printed after all the parent tests t.Logf), so we need to screw around
with a special Logf that can point at the "current" t (current_t.Logf)
in some places. Probably our entire way of using subtests is wrong,
since 'go test' would probably like to run them all in parallel if you
called t.Parallel(), but it definitely can't because the're all
manipulating the shared state created by the parent test. They should
probably all be separate toplevel tests instead, with common
setup/teardown logic. But that's a job for another time.
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
2020-05-14 03:59:54 +01:00
|
|
|
tstest.PanicOnLog()
|
2021-02-02 19:30:46 +00:00
|
|
|
tstest.ResourceCheck(t)
|
2020-03-03 21:50:47 +00:00
|
|
|
|
2020-02-05 22:16:58 +00:00
|
|
|
epCh := make(chan string, 16)
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
epFunc := func(endpoints []tailcfg.Endpoint) {
|
2020-02-05 22:16:58 +00:00
|
|
|
for _, ep := range endpoints {
|
tailcfg: add Endpoint, EndpointType, MapRequest.EndpointType
Track endpoints internally with a new tailcfg.Endpoint type that
includes a typed netaddr.IPPort (instead of just a string) and
includes a type for how that endpoint was discovered (STUN, local,
etc).
Use []tailcfg.Endpoint instead of []string internally.
At the last second, send it to the control server as the existing
[]string for endpoints, but also include a new parallel
MapRequest.EndpointType []tailcfg.EndpointType, so the control server
can start filtering out less-important endpoint changes from
new-enough clients. Notably, STUN-discovered endpoints can be filtered
out from 1.6+ clients, as they can discover them amongst each other
via CallMeMaybe disco exchanges started over DERP. And STUN endpoints
change a lot, causing a lot of MapResposne updates. But portmapped
endpoints are worth keeping for now, as they they work right away
without requiring the firewall traversal extra RTT dance.
End result will be less control->client bandwidth. (despite negligible
increase in client->control bandwidth)
Updates tailscale/corp#1543
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2021-04-12 21:24:29 +01:00
|
|
|
epCh <- ep.Addr.String()
|
2020-02-05 22:16:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-12 21:14:48 +00:00
|
|
|
stunAddr, stunCleanupFn := stuntest.Serve(t)
|
2020-03-03 11:51:31 +00:00
|
|
|
defer stunCleanupFn()
|
2020-02-05 22:16:58 +00:00
|
|
|
|
|
|
|
port := pickPort(t)
|
2020-05-17 17:51:38 +01:00
|
|
|
conn, err := NewConn(Options{
|
2021-08-26 03:39:20 +01:00
|
|
|
Port: port,
|
|
|
|
EndpointsFunc: epFunc,
|
|
|
|
Logf: t.Logf,
|
2020-02-05 22:16:58 +00:00
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
defer conn.Close()
|
2020-05-17 17:51:38 +01:00
|
|
|
conn.SetDERPMap(stuntest.DERPMapOf(stunAddr.String()))
|
2020-12-30 01:22:56 +00:00
|
|
|
conn.SetPrivateKey(wgkey.Private(key.NewPrivate()))
|
2020-02-05 22:16:58 +00:00
|
|
|
|
|
|
|
go func() {
|
2020-02-18 16:57:11 +00:00
|
|
|
var pkt [64 << 10]byte
|
2020-02-05 22:16:58 +00:00
|
|
|
for {
|
2021-03-24 16:41:57 +00:00
|
|
|
_, _, err := conn.receiveIPv4(pkt[:])
|
2020-02-05 22:16:58 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2020-03-06 20:37:19 +00:00
|
|
|
timeout := time.After(10 * time.Second)
|
2020-02-05 22:16:58 +00:00
|
|
|
var endpoints []string
|
|
|
|
suffix := fmt.Sprintf(":%d", port)
|
|
|
|
collectEndpoints:
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case ep := <-epCh:
|
|
|
|
endpoints = append(endpoints, ep)
|
|
|
|
if strings.HasSuffix(ep, suffix) {
|
|
|
|
break collectEndpoints
|
|
|
|
}
|
2020-03-06 20:37:19 +00:00
|
|
|
case <-timeout:
|
2020-02-05 22:16:58 +00:00
|
|
|
t.Fatalf("timeout with endpoints: %v", endpoints)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-03 04:12:14 +00:00
|
|
|
func pickPort(t testing.TB) uint16 {
|
2020-02-05 22:16:58 +00:00
|
|
|
t.Helper()
|
2020-10-28 15:23:12 +00:00
|
|
|
conn, err := net.ListenPacket("udp4", "127.0.0.1:0")
|
2020-02-05 22:16:58 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
defer conn.Close()
|
|
|
|
return uint16(conn.LocalAddr().(*net.UDPAddr).Port)
|
|
|
|
}
|
2020-02-18 21:32:04 +00:00
|
|
|
|
2020-03-04 06:21:56 +00:00
|
|
|
func TestPickDERPFallback(t *testing.T) {
|
Add tstest.PanicOnLog(), and fix various problems detected by this.
If a test calls log.Printf, 'go test' horrifyingly rearranges the
output to no longer be in chronological order, which makes debugging
virtually impossible. Let's stop that from happening by making
log.Printf panic if called from any module, no matter how deep, during
tests.
This required us to change the default error handler in at least one
http.Server, as well as plumbing a bunch of logf functions around,
especially in magicsock and wgengine, but also in logtail and backoff.
To add insult to injury, 'go test' also rearranges the output when a
parent test has multiple sub-tests (all the sub-test's t.Logf is always
printed after all the parent tests t.Logf), so we need to screw around
with a special Logf that can point at the "current" t (current_t.Logf)
in some places. Probably our entire way of using subtests is wrong,
since 'go test' would probably like to run them all in parallel if you
called t.Parallel(), but it definitely can't because the're all
manipulating the shared state created by the parent test. They should
probably all be separate toplevel tests instead, with common
setup/teardown logic. But that's a job for another time.
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
2020-05-14 03:59:54 +01:00
|
|
|
tstest.PanicOnLog()
|
2021-02-02 19:30:46 +00:00
|
|
|
tstest.ResourceCheck(t)
|
Add tstest.PanicOnLog(), and fix various problems detected by this.
If a test calls log.Printf, 'go test' horrifyingly rearranges the
output to no longer be in chronological order, which makes debugging
virtually impossible. Let's stop that from happening by making
log.Printf panic if called from any module, no matter how deep, during
tests.
This required us to change the default error handler in at least one
http.Server, as well as plumbing a bunch of logf functions around,
especially in magicsock and wgengine, but also in logtail and backoff.
To add insult to injury, 'go test' also rearranges the output when a
parent test has multiple sub-tests (all the sub-test's t.Logf is always
printed after all the parent tests t.Logf), so we need to screw around
with a special Logf that can point at the "current" t (current_t.Logf)
in some places. Probably our entire way of using subtests is wrong,
since 'go test' would probably like to run them all in parallel if you
called t.Parallel(), but it definitely can't because the're all
manipulating the shared state created by the parent test. They should
probably all be separate toplevel tests instead, with common
setup/teardown logic. But that's a job for another time.
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
2020-05-14 03:59:54 +01:00
|
|
|
|
2020-05-17 17:51:38 +01:00
|
|
|
c := newConn()
|
2021-06-25 19:44:40 +01:00
|
|
|
dm := &tailcfg.DERPMap{
|
|
|
|
Regions: map[int]*tailcfg.DERPRegion{
|
|
|
|
1: &tailcfg.DERPRegion{},
|
|
|
|
2: &tailcfg.DERPRegion{},
|
|
|
|
3: &tailcfg.DERPRegion{},
|
|
|
|
4: &tailcfg.DERPRegion{},
|
|
|
|
5: &tailcfg.DERPRegion{},
|
|
|
|
6: &tailcfg.DERPRegion{},
|
|
|
|
7: &tailcfg.DERPRegion{},
|
|
|
|
8: &tailcfg.DERPRegion{},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
c.derpMap = dm
|
2020-03-04 06:21:56 +00:00
|
|
|
a := c.pickDERPFallback()
|
|
|
|
if a == 0 {
|
|
|
|
t.Fatalf("pickDERPFallback returned 0")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that it's consistent.
|
|
|
|
for i := 0; i < 50; i++ {
|
|
|
|
b := c.pickDERPFallback()
|
|
|
|
if a != b {
|
|
|
|
t.Fatalf("got inconsistent %d vs %d values", a, b)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that that the pointer value of c is blended in and
|
|
|
|
// distribution over nodes works.
|
|
|
|
got := map[int]int{}
|
|
|
|
for i := 0; i < 50; i++ {
|
2020-05-17 17:51:38 +01:00
|
|
|
c = newConn()
|
2021-06-25 19:44:40 +01:00
|
|
|
c.derpMap = dm
|
2020-03-04 06:21:56 +00:00
|
|
|
got[c.pickDERPFallback()]++
|
|
|
|
}
|
|
|
|
t.Logf("distribution: %v", got)
|
|
|
|
if len(got) < 2 {
|
|
|
|
t.Errorf("expected more than 1 node; got %v", got)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test that stickiness works.
|
|
|
|
const someNode = 123456
|
|
|
|
c.myDerp = someNode
|
|
|
|
if got := c.pickDERPFallback(); got != someNode {
|
|
|
|
t.Errorf("not sticky: got %v; want %v", got, someNode)
|
|
|
|
}
|
2020-03-25 18:14:29 +00:00
|
|
|
|
2021-08-26 03:39:20 +01:00
|
|
|
// TODO: test that disco-based clients changing to a new DERP
|
|
|
|
// region causes this fallback to also move, once disco clients
|
|
|
|
// have fixed DERP fallback logic.
|
2020-03-03 15:39:40 +00:00
|
|
|
}
|
|
|
|
|
2020-03-07 01:50:36 +00:00
|
|
|
// TestDeviceStartStop exercises the startup and shutdown logic of
|
|
|
|
// wireguard-go, which is intimately intertwined with magicsock's own
|
|
|
|
// lifecycle. We seem to be good at generating deadlocks here, so if
|
|
|
|
// this test fails you should suspect a deadlock somewhere in startup
|
|
|
|
// or shutdown. It may be an infrequent flake, so run with
|
|
|
|
// -count=10000 to be sure.
|
|
|
|
func TestDeviceStartStop(t *testing.T) {
|
Add tstest.PanicOnLog(), and fix various problems detected by this.
If a test calls log.Printf, 'go test' horrifyingly rearranges the
output to no longer be in chronological order, which makes debugging
virtually impossible. Let's stop that from happening by making
log.Printf panic if called from any module, no matter how deep, during
tests.
This required us to change the default error handler in at least one
http.Server, as well as plumbing a bunch of logf functions around,
especially in magicsock and wgengine, but also in logtail and backoff.
To add insult to injury, 'go test' also rearranges the output when a
parent test has multiple sub-tests (all the sub-test's t.Logf is always
printed after all the parent tests t.Logf), so we need to screw around
with a special Logf that can point at the "current" t (current_t.Logf)
in some places. Probably our entire way of using subtests is wrong,
since 'go test' would probably like to run them all in parallel if you
called t.Parallel(), but it definitely can't because the're all
manipulating the shared state created by the parent test. They should
probably all be separate toplevel tests instead, with common
setup/teardown logic. But that's a job for another time.
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
2020-05-14 03:59:54 +01:00
|
|
|
tstest.PanicOnLog()
|
2021-02-02 19:30:46 +00:00
|
|
|
tstest.ResourceCheck(t)
|
Add tstest.PanicOnLog(), and fix various problems detected by this.
If a test calls log.Printf, 'go test' horrifyingly rearranges the
output to no longer be in chronological order, which makes debugging
virtually impossible. Let's stop that from happening by making
log.Printf panic if called from any module, no matter how deep, during
tests.
This required us to change the default error handler in at least one
http.Server, as well as plumbing a bunch of logf functions around,
especially in magicsock and wgengine, but also in logtail and backoff.
To add insult to injury, 'go test' also rearranges the output when a
parent test has multiple sub-tests (all the sub-test's t.Logf is always
printed after all the parent tests t.Logf), so we need to screw around
with a special Logf that can point at the "current" t (current_t.Logf)
in some places. Probably our entire way of using subtests is wrong,
since 'go test' would probably like to run them all in parallel if you
called t.Parallel(), but it definitely can't because the're all
manipulating the shared state created by the parent test. They should
probably all be separate toplevel tests instead, with common
setup/teardown logic. But that's a job for another time.
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
2020-05-14 03:59:54 +01:00
|
|
|
|
2020-05-17 17:51:38 +01:00
|
|
|
conn, err := NewConn(Options{
|
2021-08-26 03:39:20 +01:00
|
|
|
EndpointsFunc: func(eps []tailcfg.Endpoint) {},
|
|
|
|
Logf: t.Logf,
|
2020-03-07 01:50:36 +00:00
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
defer conn.Close()
|
|
|
|
|
|
|
|
tun := tuntest.NewChannelTUN()
|
2021-03-23 18:39:06 +00:00
|
|
|
wgLogger := wglog.NewLogger(t.Logf)
|
2021-05-11 23:24:37 +01:00
|
|
|
dev := device.NewDevice(tun.TUN(), conn.Bind(), wgLogger.DeviceLogger)
|
2020-03-07 01:50:36 +00:00
|
|
|
dev.Up()
|
|
|
|
dev.Close()
|
|
|
|
}
|
|
|
|
|
2021-01-11 01:22:11 +00:00
|
|
|
// Exercise a code path in sendDiscoMessage if the connection has been closed.
|
|
|
|
func TestConnClosed(t *testing.T) {
|
|
|
|
mstun := &natlab.Machine{Name: "stun"}
|
|
|
|
m1 := &natlab.Machine{Name: "m1"}
|
|
|
|
m2 := &natlab.Machine{Name: "m2"}
|
|
|
|
inet := natlab.NewInternet()
|
|
|
|
sif := mstun.Attach("eth0", inet)
|
|
|
|
m1if := m1.Attach("eth0", inet)
|
|
|
|
m2if := m2.Attach("eth0", inet)
|
|
|
|
|
|
|
|
d := &devices{
|
|
|
|
m1: m1,
|
|
|
|
m1IP: m1if.V4(),
|
|
|
|
m2: m2,
|
|
|
|
m2IP: m2if.V4(),
|
|
|
|
stun: mstun,
|
|
|
|
stunIP: sif.V4(),
|
|
|
|
}
|
|
|
|
|
2021-01-15 01:39:36 +00:00
|
|
|
logf, closeLogf := logger.LogfCloser(t.Logf)
|
|
|
|
defer closeLogf()
|
|
|
|
|
|
|
|
derpMap, cleanup := runDERPAndStun(t, logf, d.stun, d.stunIP)
|
2021-01-11 01:22:11 +00:00
|
|
|
defer cleanup()
|
|
|
|
|
2021-08-26 03:39:20 +01:00
|
|
|
ms1 := newMagicStack(t, logger.WithPrefix(logf, "conn1: "), d.m1, derpMap)
|
2021-01-11 01:22:11 +00:00
|
|
|
defer ms1.Close()
|
2021-08-26 03:39:20 +01:00
|
|
|
ms2 := newMagicStack(t, logger.WithPrefix(logf, "conn2: "), d.m2, derpMap)
|
2021-01-11 01:22:11 +00:00
|
|
|
defer ms2.Close()
|
|
|
|
|
2021-08-26 03:39:20 +01:00
|
|
|
cleanup = meshStacks(t.Logf, nil, ms1, ms2)
|
2021-01-11 01:22:11 +00:00
|
|
|
defer cleanup()
|
|
|
|
|
2021-03-29 21:50:44 +01:00
|
|
|
pkt := tuntest.Ping(ms2.IP().IPAddr().IP, ms1.IP().IPAddr().IP)
|
2021-01-11 01:22:11 +00:00
|
|
|
|
|
|
|
if len(ms1.conn.activeDerp) == 0 {
|
|
|
|
t.Errorf("unexpected DERP empty got: %v want: >0", len(ms1.conn.activeDerp))
|
|
|
|
}
|
|
|
|
|
|
|
|
ms1.conn.Close()
|
|
|
|
ms2.conn.Close()
|
|
|
|
|
|
|
|
// This should hit a c.closed conditional in sendDiscoMessage() and return immediately.
|
|
|
|
ms1.tun.Outbound <- pkt
|
|
|
|
select {
|
|
|
|
case <-ms2.tun.Inbound:
|
|
|
|
t.Error("unexpected response with connection closed")
|
|
|
|
case <-time.After(100 * time.Millisecond):
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(ms1.conn.activeDerp) > 0 {
|
|
|
|
t.Errorf("unexpected DERP active got: %v want:0", len(ms1.conn.activeDerp))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-14 04:44:58 +01:00
|
|
|
func makeNestable(t *testing.T) (logf logger.Logf, setT func(t *testing.T)) {
|
2020-06-22 09:54:59 +01:00
|
|
|
var mu sync.RWMutex
|
2020-05-14 04:44:58 +01:00
|
|
|
cur := t
|
|
|
|
|
|
|
|
setT = func(t *testing.T) {
|
|
|
|
mu.Lock()
|
|
|
|
cur = t
|
|
|
|
mu.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
logf = func(s string, args ...interface{}) {
|
2020-06-22 09:54:59 +01:00
|
|
|
mu.RLock()
|
2020-05-14 04:44:58 +01:00
|
|
|
t := cur
|
|
|
|
|
|
|
|
t.Helper()
|
|
|
|
t.Logf(s, args...)
|
2020-06-22 09:54:59 +01:00
|
|
|
mu.RUnlock()
|
2020-05-14 04:44:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return logf, setT
|
|
|
|
}
|
|
|
|
|
2021-08-26 07:33:46 +01:00
|
|
|
// localhostOnlyListener is a nettype.PacketListener that listens on
|
|
|
|
// localhost (127.0.0.1 or ::1, depending on the requested network)
|
|
|
|
// when asked to listen on the unspecified address.
|
|
|
|
//
|
|
|
|
// It's used in tests where we set up localhost-to-localhost
|
|
|
|
// communication, because if you listen on the unspecified address on
|
|
|
|
// macOS and Windows, you get an interactive firewall consent prompt
|
|
|
|
// to allow the binding, which breaks our CIs.
|
|
|
|
type localhostListener struct{}
|
|
|
|
|
|
|
|
func (localhostListener) ListenPacket(ctx context.Context, network, address string) (net.PacketConn, error) {
|
|
|
|
host, port, err := net.SplitHostPort(address)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
switch network {
|
|
|
|
case "udp4":
|
|
|
|
switch host {
|
|
|
|
case "", "0.0.0.0":
|
|
|
|
host = "127.0.0.1"
|
|
|
|
case "127.0.0.1":
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("localhostListener cannot be asked to listen on %q", address)
|
|
|
|
}
|
|
|
|
case "udp6":
|
|
|
|
switch host {
|
|
|
|
case "", "::":
|
|
|
|
host = "::1"
|
|
|
|
case "::1":
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("localhostListener cannot be asked to listen on %q", address)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
var conf net.ListenConfig
|
|
|
|
return conf.ListenPacket(ctx, network, net.JoinHostPort(host, port))
|
|
|
|
}
|
|
|
|
|
2020-03-03 15:39:40 +00:00
|
|
|
func TestTwoDevicePing(t *testing.T) {
|
2021-08-26 07:33:46 +01:00
|
|
|
l, ip := localhostListener{}, netaddr.IPv4(127, 0, 0, 1)
|
2020-07-27 16:21:17 +01:00
|
|
|
n := &devices{
|
|
|
|
m1: l,
|
|
|
|
m1IP: ip,
|
|
|
|
m2: l,
|
|
|
|
m2IP: ip,
|
|
|
|
stun: l,
|
|
|
|
stunIP: ip,
|
|
|
|
}
|
|
|
|
testTwoDevicePing(t, n)
|
|
|
|
}
|
|
|
|
|
2021-08-26 03:39:20 +01:00
|
|
|
// Legacy clients appear to new code as peers that know about DERP and
|
|
|
|
// WireGuard, but don't have a disco key. Check that we can still
|
|
|
|
// communicate successfully with such peers.
|
|
|
|
func TestNoDiscoKey(t *testing.T) {
|
|
|
|
tstest.PanicOnLog()
|
|
|
|
tstest.ResourceCheck(t)
|
|
|
|
|
2021-08-26 07:33:46 +01:00
|
|
|
derpMap, cleanup := runDERPAndStun(t, t.Logf, localhostListener{}, netaddr.IPv4(127, 0, 0, 1))
|
2021-08-26 03:39:20 +01:00
|
|
|
defer cleanup()
|
|
|
|
|
2021-08-26 07:33:46 +01:00
|
|
|
m1 := newMagicStack(t, t.Logf, localhostListener{}, derpMap)
|
2021-08-26 03:39:20 +01:00
|
|
|
defer m1.Close()
|
2021-08-26 07:33:46 +01:00
|
|
|
m2 := newMagicStack(t, t.Logf, localhostListener{}, derpMap)
|
2021-08-26 03:39:20 +01:00
|
|
|
defer m2.Close()
|
|
|
|
|
|
|
|
removeDisco := func(idx int, nm *netmap.NetworkMap) {
|
|
|
|
for _, p := range nm.Peers {
|
|
|
|
p.DiscoKey = tailcfg.DiscoKey{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanupMesh := meshStacks(t.Logf, removeDisco, m1, m2)
|
|
|
|
defer cleanupMesh()
|
|
|
|
|
|
|
|
// Wait for both peers to know about each other before we try to
|
|
|
|
// ping.
|
|
|
|
for {
|
|
|
|
if s1 := m1.Status(); len(s1.Peer) != 1 {
|
|
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if s2 := m2.Status(); len(s2.Peer) != 1 {
|
|
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
pkt := tuntest.Ping(m2.IP().IPAddr().IP, m1.IP().IPAddr().IP)
|
|
|
|
m1.tun.Outbound <- pkt
|
|
|
|
select {
|
|
|
|
case <-m2.tun.Inbound:
|
|
|
|
t.Logf("ping m1>m2 ok")
|
|
|
|
case <-time.After(10 * time.Second):
|
|
|
|
t.Fatalf("timed out waiting for ping to transit")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-06 18:18:12 +01:00
|
|
|
func TestDiscokeyChange(t *testing.T) {
|
|
|
|
tstest.PanicOnLog()
|
|
|
|
tstest.ResourceCheck(t)
|
|
|
|
|
|
|
|
derpMap, cleanup := runDERPAndStun(t, t.Logf, localhostListener{}, netaddr.IPv4(127, 0, 0, 1))
|
|
|
|
defer cleanup()
|
|
|
|
|
|
|
|
m1Key, err := wgkey.NewPrivate()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("generating nodekey: %v", err)
|
|
|
|
}
|
|
|
|
m1 := newMagicStackWithKey(t, t.Logf, localhostListener{}, derpMap, m1Key)
|
|
|
|
defer m1.Close()
|
|
|
|
m2 := newMagicStack(t, t.Logf, localhostListener{}, derpMap)
|
|
|
|
defer m2.Close()
|
|
|
|
|
|
|
|
var (
|
|
|
|
mu sync.Mutex
|
|
|
|
// Start with some random discoKey that isn't actually m1's key,
|
|
|
|
// to simulate m2 coming up with knowledge of an old, expired
|
|
|
|
// discokey. We'll switch to the correct one later in the test.
|
|
|
|
m1DiscoKey = tailcfg.DiscoKey(key.NewPrivate().Public())
|
|
|
|
)
|
|
|
|
setm1Key := func(idx int, nm *netmap.NetworkMap) {
|
|
|
|
if idx != 1 {
|
|
|
|
// only mutate m2's netmap
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if len(nm.Peers) != 1 {
|
|
|
|
// m1 not in netmap yet.
|
|
|
|
return
|
|
|
|
}
|
|
|
|
mu.Lock()
|
|
|
|
defer mu.Unlock()
|
|
|
|
nm.Peers[0].DiscoKey = m1DiscoKey
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanupMesh := meshStacks(t.Logf, setm1Key, m1, m2)
|
|
|
|
defer cleanupMesh()
|
|
|
|
|
|
|
|
// Wait for both peers to know about each other.
|
|
|
|
for {
|
|
|
|
if s1 := m1.Status(); len(s1.Peer) != 1 {
|
|
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if s2 := m2.Status(); len(s2.Peer) != 1 {
|
|
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
mu.Lock()
|
|
|
|
m1DiscoKey = m1.conn.DiscoPublicKey()
|
|
|
|
mu.Unlock()
|
|
|
|
|
|
|
|
// Manually trigger an endpoint update to meshStacks, so it hands
|
|
|
|
// m2 a new netmap.
|
|
|
|
m1.conn.mu.Lock()
|
|
|
|
m1.epCh <- m1.conn.lastEndpoints
|
|
|
|
m1.conn.mu.Unlock()
|
|
|
|
|
|
|
|
cleanup = newPinger(t, t.Logf, m1, m2)
|
|
|
|
defer cleanup()
|
|
|
|
|
|
|
|
mustDirect(t, t.Logf, m1, m2)
|
|
|
|
mustDirect(t, t.Logf, m2, m1)
|
|
|
|
}
|
|
|
|
|
2020-07-27 16:21:17 +01:00
|
|
|
func TestActiveDiscovery(t *testing.T) {
|
|
|
|
t.Run("simple_internet", func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
mstun := &natlab.Machine{Name: "stun"}
|
|
|
|
m1 := &natlab.Machine{Name: "m1"}
|
|
|
|
m2 := &natlab.Machine{Name: "m2"}
|
|
|
|
inet := natlab.NewInternet()
|
|
|
|
sif := mstun.Attach("eth0", inet)
|
|
|
|
m1if := m1.Attach("eth0", inet)
|
|
|
|
m2if := m2.Attach("eth0", inet)
|
|
|
|
|
2020-07-11 07:48:08 +01:00
|
|
|
n := &devices{
|
2020-07-27 16:21:17 +01:00
|
|
|
m1: m1,
|
|
|
|
m1IP: m1if.V4(),
|
|
|
|
m2: m2,
|
|
|
|
m2IP: m2if.V4(),
|
|
|
|
stun: mstun,
|
|
|
|
stunIP: sif.V4(),
|
2020-07-11 07:48:08 +01:00
|
|
|
}
|
2020-07-27 16:21:17 +01:00
|
|
|
testActiveDiscovery(t, n)
|
2020-07-10 22:26:04 +01:00
|
|
|
})
|
2020-07-11 08:03:19 +01:00
|
|
|
|
2020-07-27 21:34:41 +01:00
|
|
|
t.Run("facing_easy_firewalls", func(t *testing.T) {
|
2020-07-27 16:21:17 +01:00
|
|
|
mstun := &natlab.Machine{Name: "stun"}
|
|
|
|
m1 := &natlab.Machine{
|
|
|
|
Name: "m1",
|
|
|
|
PacketHandler: &natlab.Firewall{},
|
|
|
|
}
|
|
|
|
m2 := &natlab.Machine{
|
|
|
|
Name: "m2",
|
|
|
|
PacketHandler: &natlab.Firewall{},
|
|
|
|
}
|
|
|
|
inet := natlab.NewInternet()
|
|
|
|
sif := mstun.Attach("eth0", inet)
|
|
|
|
m1if := m1.Attach("eth0", inet)
|
|
|
|
m2if := m2.Attach("eth0", inet)
|
|
|
|
|
|
|
|
n := &devices{
|
|
|
|
m1: m1,
|
|
|
|
m1IP: m1if.V4(),
|
|
|
|
m2: m2,
|
|
|
|
m2IP: m2if.V4(),
|
|
|
|
stun: mstun,
|
|
|
|
stunIP: sif.V4(),
|
|
|
|
}
|
|
|
|
testActiveDiscovery(t, n)
|
2020-07-10 22:26:04 +01:00
|
|
|
})
|
2020-07-27 16:21:17 +01:00
|
|
|
|
|
|
|
t.Run("facing_nats", func(t *testing.T) {
|
|
|
|
mstun := &natlab.Machine{Name: "stun"}
|
|
|
|
m1 := &natlab.Machine{
|
|
|
|
Name: "m1",
|
|
|
|
PacketHandler: &natlab.Firewall{},
|
|
|
|
}
|
|
|
|
nat1 := &natlab.Machine{
|
|
|
|
Name: "nat1",
|
|
|
|
}
|
|
|
|
m2 := &natlab.Machine{
|
|
|
|
Name: "m2",
|
|
|
|
PacketHandler: &natlab.Firewall{},
|
|
|
|
}
|
|
|
|
nat2 := &natlab.Machine{
|
|
|
|
Name: "nat2",
|
|
|
|
}
|
|
|
|
|
|
|
|
inet := natlab.NewInternet()
|
|
|
|
lan1 := &natlab.Network{
|
|
|
|
Name: "lan1",
|
2021-08-26 03:39:20 +01:00
|
|
|
Prefix4: netaddr.MustParseIPPrefix("192.168.0.0/24"),
|
2020-07-27 16:21:17 +01:00
|
|
|
}
|
|
|
|
lan2 := &natlab.Network{
|
|
|
|
Name: "lan2",
|
2021-08-26 03:39:20 +01:00
|
|
|
Prefix4: netaddr.MustParseIPPrefix("192.168.1.0/24"),
|
2020-07-27 16:21:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
sif := mstun.Attach("eth0", inet)
|
|
|
|
nat1WAN := nat1.Attach("wan", inet)
|
|
|
|
nat1LAN := nat1.Attach("lan1", lan1)
|
|
|
|
nat2WAN := nat2.Attach("wan", inet)
|
|
|
|
nat2LAN := nat2.Attach("lan2", lan2)
|
|
|
|
m1if := m1.Attach("eth0", lan1)
|
|
|
|
m2if := m2.Attach("eth0", lan2)
|
|
|
|
lan1.SetDefaultGateway(nat1LAN)
|
|
|
|
lan2.SetDefaultGateway(nat2LAN)
|
|
|
|
|
|
|
|
nat1.PacketHandler = &natlab.SNAT44{
|
|
|
|
Machine: nat1,
|
|
|
|
ExternalInterface: nat1WAN,
|
|
|
|
Firewall: &natlab.Firewall{
|
|
|
|
TrustedInterface: nat1LAN,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
nat2.PacketHandler = &natlab.SNAT44{
|
|
|
|
Machine: nat2,
|
|
|
|
ExternalInterface: nat2WAN,
|
|
|
|
Firewall: &natlab.Firewall{
|
|
|
|
TrustedInterface: nat2LAN,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
n := &devices{
|
|
|
|
m1: m1,
|
|
|
|
m1IP: m1if.V4(),
|
|
|
|
m2: m2,
|
|
|
|
m2IP: m2if.V4(),
|
|
|
|
stun: mstun,
|
|
|
|
stunIP: sif.V4(),
|
|
|
|
}
|
|
|
|
testActiveDiscovery(t, n)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-07-11 07:48:08 +01:00
|
|
|
type devices struct {
|
|
|
|
m1 nettype.PacketListener
|
|
|
|
m1IP netaddr.IP
|
|
|
|
|
|
|
|
m2 nettype.PacketListener
|
|
|
|
m2IP netaddr.IP
|
|
|
|
|
|
|
|
stun nettype.PacketListener
|
|
|
|
stunIP netaddr.IP
|
|
|
|
}
|
|
|
|
|
2020-07-27 16:09:54 +01:00
|
|
|
// newPinger starts continuously sending test packets from srcM to
|
|
|
|
// dstM, until cleanup is invoked to stop it. Each ping has 1 second
|
|
|
|
// to transit the network. It is a test failure to lose a ping.
|
2020-07-27 21:25:25 +01:00
|
|
|
func newPinger(t *testing.T, logf logger.Logf, src, dst *magicStack) (cleanup func()) {
|
2020-07-27 16:09:54 +01:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
done := make(chan struct{})
|
|
|
|
one := func() bool {
|
|
|
|
// TODO(danderson): requiring exactly zero packet loss
|
|
|
|
// will probably be too strict for some tests we'd like to
|
|
|
|
// run (e.g. discovery switching to a new path on
|
|
|
|
// failure). Figure out what kind of thing would be
|
|
|
|
// acceptable to test instead of "every ping must
|
|
|
|
// transit".
|
2021-03-29 21:50:44 +01:00
|
|
|
pkt := tuntest.Ping(dst.IP().IPAddr().IP, src.IP().IPAddr().IP)
|
2020-07-27 21:32:45 +01:00
|
|
|
select {
|
|
|
|
case src.tun.Outbound <- pkt:
|
|
|
|
case <-ctx.Done():
|
|
|
|
return false
|
|
|
|
}
|
2020-07-27 16:09:54 +01:00
|
|
|
select {
|
2020-07-27 21:25:25 +01:00
|
|
|
case <-dst.tun.Inbound:
|
2020-07-27 16:09:54 +01:00
|
|
|
return true
|
2020-07-27 17:20:31 +01:00
|
|
|
case <-time.After(10 * time.Second):
|
|
|
|
// Very generous timeout here because depending on
|
|
|
|
// magicsock setup races, the first handshake might get
|
|
|
|
// eaten by the receiving end (if wireguard-go hasn't been
|
|
|
|
// configured quite yet), so we have to wait for at least
|
|
|
|
// the first retransmit from wireguard before we declare
|
|
|
|
// failure.
|
2020-07-27 16:09:54 +01:00
|
|
|
t.Errorf("timed out waiting for ping to transit")
|
|
|
|
return true
|
|
|
|
case <-ctx.Done():
|
2020-07-27 20:46:34 +01:00
|
|
|
// Try a little bit longer to consume the packet we're
|
|
|
|
// waiting for. This is to deal with shutdown races, where
|
|
|
|
// natlab may still be delivering a packet to us from a
|
|
|
|
// goroutine.
|
|
|
|
select {
|
2020-07-27 21:25:25 +01:00
|
|
|
case <-dst.tun.Inbound:
|
2020-07-27 20:46:34 +01:00
|
|
|
case <-time.After(time.Second):
|
|
|
|
}
|
2020-07-27 16:09:54 +01:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup = func() {
|
|
|
|
cancel()
|
|
|
|
<-done
|
|
|
|
}
|
|
|
|
|
|
|
|
// Synchronously transit one ping to get things started. This is
|
|
|
|
// nice because it means that newPinger returning means we've
|
|
|
|
// worked through initial connectivity.
|
|
|
|
if !one() {
|
|
|
|
cleanup()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
go func() {
|
2021-03-29 21:50:44 +01:00
|
|
|
logf("sending ping stream from %s (%s) to %s (%s)", src, src.IP(), dst, dst.IP())
|
2020-07-27 16:09:54 +01:00
|
|
|
defer close(done)
|
|
|
|
for one() {
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return cleanup
|
|
|
|
}
|
|
|
|
|
2020-07-27 21:33:09 +01:00
|
|
|
// testActiveDiscovery verifies that two magicStacks tied to the given
|
|
|
|
// devices can establish a direct p2p connection with each other. See
|
|
|
|
// TestActiveDiscovery for the various configurations of devices that
|
|
|
|
// get exercised.
|
2020-07-25 01:32:18 +01:00
|
|
|
func testActiveDiscovery(t *testing.T, d *devices) {
|
|
|
|
tstest.PanicOnLog()
|
2021-02-02 19:30:46 +00:00
|
|
|
tstest.ResourceCheck(t)
|
2020-07-25 01:32:18 +01:00
|
|
|
|
|
|
|
tlogf, setT := makeNestable(t)
|
|
|
|
setT(t)
|
|
|
|
|
|
|
|
start := time.Now()
|
2021-01-16 02:19:20 +00:00
|
|
|
wlogf := func(msg string, args ...interface{}) {
|
2020-09-12 00:26:05 +01:00
|
|
|
t.Helper()
|
|
|
|
msg = fmt.Sprintf("%s: %s", time.Since(start).Truncate(time.Microsecond), msg)
|
2020-07-25 01:32:18 +01:00
|
|
|
tlogf(msg, args...)
|
|
|
|
}
|
2021-01-16 02:19:20 +00:00
|
|
|
logf, closeLogf := logger.LogfCloser(wlogf)
|
|
|
|
defer closeLogf()
|
2020-07-25 01:32:18 +01:00
|
|
|
|
|
|
|
derpMap, cleanup := runDERPAndStun(t, logf, d.stun, d.stunIP)
|
|
|
|
defer cleanup()
|
|
|
|
|
2021-08-26 03:39:20 +01:00
|
|
|
m1 := newMagicStack(t, logger.WithPrefix(logf, "conn1: "), d.m1, derpMap)
|
2020-07-25 01:32:18 +01:00
|
|
|
defer m1.Close()
|
2021-08-26 03:39:20 +01:00
|
|
|
m2 := newMagicStack(t, logger.WithPrefix(logf, "conn2: "), d.m2, derpMap)
|
2020-07-25 01:32:18 +01:00
|
|
|
defer m2.Close()
|
|
|
|
|
2021-08-26 03:39:20 +01:00
|
|
|
cleanup = meshStacks(logf, nil, m1, m2)
|
2020-07-25 01:32:18 +01:00
|
|
|
defer cleanup()
|
|
|
|
|
2021-03-29 21:50:44 +01:00
|
|
|
m1IP := m1.IP()
|
|
|
|
m2IP := m2.IP()
|
2020-07-25 01:32:18 +01:00
|
|
|
logf("IPs: %s %s", m1IP, m2IP)
|
|
|
|
|
2020-07-27 21:25:25 +01:00
|
|
|
cleanup = newPinger(t, logf, m1, m2)
|
2020-07-27 16:09:54 +01:00
|
|
|
defer cleanup()
|
|
|
|
|
|
|
|
// Everything is now up and running, active discovery should find
|
|
|
|
// a direct path between our peers. Wait for it to switch away
|
|
|
|
// from DERP.
|
2021-10-06 18:18:12 +01:00
|
|
|
mustDirect(t, logf, m1, m2)
|
|
|
|
mustDirect(t, logf, m2, m1)
|
2020-07-27 16:09:54 +01:00
|
|
|
|
2021-10-06 18:18:12 +01:00
|
|
|
logf("starting cleanup")
|
|
|
|
}
|
|
|
|
|
|
|
|
func mustDirect(t *testing.T, logf logger.Logf, m1, m2 *magicStack) {
|
|
|
|
lastLog := time.Now().Add(-time.Minute)
|
|
|
|
// See https://github.com/tailscale/tailscale/issues/654 for a discussion of this deadline.
|
|
|
|
for deadline := time.Now().Add(10 * time.Second); time.Now().Before(deadline); time.Sleep(10 * time.Millisecond) {
|
|
|
|
pst := m1.Status().Peer[m2.Public()]
|
|
|
|
if pst.CurAddr != "" {
|
|
|
|
logf("direct link %s->%s found with addr %s", m1, m2, pst.CurAddr)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if now := time.Now(); now.Sub(lastLog) > time.Second {
|
|
|
|
logf("no direct path %s->%s yet, addrs %v", m1, m2, pst.Addrs)
|
|
|
|
lastLog = now
|
2020-07-27 16:09:54 +01:00
|
|
|
}
|
2020-07-25 01:32:18 +01:00
|
|
|
}
|
2021-10-06 18:18:12 +01:00
|
|
|
t.Errorf("magicsock did not find a direct path from %s to %s", m1, m2)
|
|