tailscale/ipn/ipnlocal/state_test.go

1010 lines
31 KiB
Go

// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ipnlocal
import (
"context"
"sync"
"testing"
"time"
qt "github.com/frankban/quicktest"
"tailscale.com/control/controlclient"
"tailscale.com/ipn"
"tailscale.com/ipn/store/mem"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/types/empty"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/types/netmap"
"tailscale.com/types/persist"
"tailscale.com/wgengine"
)
// notifyThrottler receives notifications from an ipn.Backend, blocking
// (with eventual timeout and t.Fatal) if there are too many and complaining
// (also with t.Fatal) if they are too few.
type notifyThrottler struct {
t *testing.T
// ch gets replaced frequently. Lock the mutex before getting or
// setting it, but not while waiting on it.
mu sync.Mutex
ch chan ipn.Notify
}
// expect tells the throttler to expect count upcoming notifications.
func (nt *notifyThrottler) expect(count int) {
nt.mu.Lock()
nt.ch = make(chan ipn.Notify, count)
nt.mu.Unlock()
}
// put adds one notification into the throttler's queue.
func (nt *notifyThrottler) put(n ipn.Notify) {
nt.mu.Lock()
ch := nt.ch
nt.mu.Unlock()
select {
case ch <- n:
return
default:
nt.t.Fatalf("put: channel full: %v", n)
}
}
// drain pulls the notifications out of the queue, asserting that there are
// exactly count notifications that have been put so far.
func (nt *notifyThrottler) drain(count int) []ipn.Notify {
nt.t.Helper()
nt.mu.Lock()
ch := nt.ch
nt.mu.Unlock()
nn := []ipn.Notify{}
for i := 0; i < count; i++ {
select {
case n := <-ch:
nn = append(nn, n)
case <-time.After(6 * time.Second):
nt.t.Fatalf("drain: channel empty after %d/%d", i, count)
}
}
// no more notifications expected
close(ch)
return nn
}
// mockControl is a mock implementation of controlclient.Client.
// Much of the backend state machine depends on callbacks and state
// in the controlclient.Client, so by controlling it, we can check that
// the state machine works as expected.
type mockControl struct {
tb testing.TB
opts controlclient.Options
logfActual logger.Logf
statusFunc func(controlclient.Status)
preventLog syncs.AtomicBool
mu sync.Mutex
calls []string
authBlocked bool
persist persist.Persist
machineKey key.MachinePrivate
}
func newMockControl(tb testing.TB) *mockControl {
return &mockControl{
tb: tb,
authBlocked: true,
}
}
func (cc *mockControl) logf(format string, args ...any) {
if cc.preventLog.Get() || cc.logfActual == nil {
return
}
cc.logfActual(format, args...)
}
func (cc *mockControl) populateKeys() (newKeys bool) {
cc.mu.Lock()
defer cc.mu.Unlock()
if cc.machineKey.IsZero() {
cc.logf("Copying machineKey.")
cc.machineKey, _ = cc.opts.GetMachinePrivateKey()
newKeys = true
}
if cc.persist.PrivateNodeKey.IsZero() {
cc.logf("Generating a new nodekey.")
cc.persist.OldPrivateNodeKey = cc.persist.PrivateNodeKey
cc.persist.PrivateNodeKey = key.NewNode()
newKeys = true
}
return newKeys
}
// send publishes a controlclient.Status notification upstream.
// (In our tests here, upstream is the ipnlocal.Local instance.)
func (cc *mockControl) send(err error, url string, loginFinished bool, nm *netmap.NetworkMap) {
if cc.statusFunc != nil {
s := controlclient.Status{
URL: url,
NetMap: nm,
Persist: &cc.persist,
Err: err,
}
if loginFinished {
s.LoginFinished = &empty.Message{}
} else if url == "" && err == nil && nm == nil {
s.LogoutFinished = &empty.Message{}
}
cc.statusFunc(s)
}
}
// called records that a particular function name was called.
func (cc *mockControl) called(s string) {
cc.mu.Lock()
defer cc.mu.Unlock()
cc.calls = append(cc.calls, s)
}
// assertCalls fails the test if the list of functions that have been called since the
// last time assertCall was run does not match want.
func (cc *mockControl) assertCalls(want ...string) {
cc.tb.Helper()
cc.mu.Lock()
defer cc.mu.Unlock()
qt.Assert(cc.tb, cc.calls, qt.DeepEquals, want)
cc.calls = nil
}
// setAuthBlocked changes the return value of AuthCantContinue.
// Auth is blocked if you haven't called Login, the control server hasn't
// provided an auth URL, or it has provided an auth URL and you haven't
// visited it yet.
func (cc *mockControl) setAuthBlocked(blocked bool) {
cc.mu.Lock()
defer cc.mu.Unlock()
cc.authBlocked = blocked
}
// Shutdown disconnects the client.
//
// Note that in a normal controlclient, Shutdown would be the last thing you
// do before discarding the object. In this mock, we don't actually discard
// the object, but if you see a call to Shutdown, you should always see a
// call to New right after it, if the object continues to be used.
// (Note that "New" is the ccGen function here; it means ipn.Backend wanted
// to create an entirely new controlclient.)
func (cc *mockControl) Shutdown() {
cc.logf("Shutdown")
cc.called("Shutdown")
}
// Login starts a login process.
// Note that in this mock, we don't automatically generate notifications
// about the progress of the login operation. You have to call setAuthBlocked()
// and send() as required by the test.
func (cc *mockControl) Login(t *tailcfg.Oauth2Token, flags controlclient.LoginFlags) {
cc.logf("Login token=%v flags=%v", t, flags)
cc.called("Login")
newKeys := cc.populateKeys()
interact := (flags & controlclient.LoginInteractive) != 0
cc.logf("Login: interact=%v newKeys=%v", interact, newKeys)
cc.setAuthBlocked(interact || newKeys)
}
func (cc *mockControl) StartLogout() {
cc.logf("StartLogout")
cc.called("StartLogout")
}
func (cc *mockControl) Logout(ctx context.Context) error {
cc.logf("Logout")
cc.called("Logout")
return nil
}
func (cc *mockControl) SetPaused(paused bool) {
cc.logf("SetPaused=%v", paused)
if paused {
cc.called("pause")
} else {
cc.called("unpause")
}
}
func (cc *mockControl) AuthCantContinue() bool {
cc.mu.Lock()
defer cc.mu.Unlock()
return cc.authBlocked
}
func (cc *mockControl) SetHostinfo(hi *tailcfg.Hostinfo) {
cc.logf("SetHostinfo: %v", *hi)
cc.called("SetHostinfo")
}
func (cc *mockControl) SetNetInfo(ni *tailcfg.NetInfo) {
cc.called("SetNetinfo")
cc.logf("SetNetInfo: %v", *ni)
cc.called("SetNetInfo")
}
func (cc *mockControl) UpdateEndpoints(endpoints []tailcfg.Endpoint) {
// validate endpoint information here?
cc.logf("UpdateEndpoints: ep=%v", endpoints)
cc.called("UpdateEndpoints")
}
// A very precise test of the sequence of function calls generated by
// ipnlocal.Local into its controlclient instance, and the events it
// produces upstream into the UI.
//
// [apenwarr] Normally I'm not a fan of "mock" style tests, but the precise
// sequence of this state machine is so important for writing our multiple
// frontends, that it's worth validating it all in one place.
//
// Any changes that affect this test will most likely require carefully
// re-testing all our GUIs (and the CLI) to make sure we didn't break
// anything.
//
// Note also that this test doesn't have any timers, goroutines, or duplicate
// detection. It expects messages to be produced in exactly the right order,
// with no duplicates, without doing network activity (other than through
// controlclient, which we fake, so there's no network activity there either).
//
// TODO: A few messages that depend on magicsock (which actually might have
// network delays) are just ignored for now, which makes the test
// predictable, but maybe a bit less thorough. This is more of an overall
// state machine test than a test of the wgengine+magicsock integration.
func TestStateMachine(t *testing.T) {
c := qt.New(t)
logf := t.Logf
store := new(testStateStorage)
e, err := wgengine.NewFakeUserspaceEngine(logf, 0)
if err != nil {
t.Fatalf("NewFakeUserspaceEngine: %v", err)
}
t.Cleanup(e.Close)
b, err := NewLocalBackend(logf, "logid", store, nil, e, 0)
if err != nil {
t.Fatalf("NewLocalBackend: %v", err)
}
cc := newMockControl(t)
cc.statusFunc = b.setClientStatus
t.Cleanup(func() { cc.preventLog.Set(true) }) // hacky way to pacify issue 3020
b.SetControlClientGetterForTesting(func(opts controlclient.Options) (controlclient.Client, error) {
cc.mu.Lock()
cc.opts = opts
cc.logfActual = opts.Logf
cc.authBlocked = true
cc.persist = cc.opts.Persist
cc.mu.Unlock()
cc.logf("ccGen: new mockControl.")
cc.called("New")
return cc, nil
})
notifies := &notifyThrottler{t: t}
notifies.expect(0)
b.SetNotifyCallback(func(n ipn.Notify) {
if cc.preventLog.Get() {
return
}
if n.State != nil ||
n.Prefs != nil ||
n.BrowseToURL != nil ||
n.LoginFinished != nil {
logf("\n%v\n\n", n)
notifies.put(n)
} else {
logf("\n(ignored) %v\n\n", n)
}
})
t.Cleanup(func() { b.SetNotifyCallback(nil) }) // hacky way to pacify issue 3020
// Check that it hasn't called us right away.
// The state machine should be idle until we call Start().
cc.assertCalls()
// Start the state machine.
// Since !WantRunning by default, it'll create a controlclient,
// but not ask it to do anything yet.
t.Logf("\n\nStart")
notifies.expect(2)
c.Assert(b.Start(ipn.Options{StateKey: ipn.GlobalDaemonStateKey}), qt.IsNil)
{
// BUG: strictly, it should pause, not unpause, here, since !WantRunning.
cc.assertCalls("New", "unpause")
nn := notifies.drain(2)
cc.assertCalls()
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[1].State, qt.IsNotNil)
prefs := *nn[0].Prefs
// Note: a totally fresh system has Prefs.LoggedOut=false by
// default. We are logged out, but not because the user asked
// for it, so it doesn't count as Prefs.LoggedOut==true.
c.Assert(nn[0].Prefs.LoggedOut, qt.IsFalse)
c.Assert(prefs.WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, *nn[1].State)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Restart the state machine.
// It's designed to handle frontends coming and going sporadically.
// Make the sure the restart not only works, but generates the same
// events as the first time, so UIs always know what to expect.
t.Logf("\n\nStart2")
notifies.expect(2)
c.Assert(b.Start(ipn.Options{StateKey: ipn.GlobalDaemonStateKey}), qt.IsNil)
{
// BUG: strictly, it should pause, not unpause, here, since !WantRunning.
cc.assertCalls("Shutdown", "unpause", "New", "unpause")
nn := notifies.drain(2)
cc.assertCalls()
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[1].State, qt.IsNotNil)
c.Assert(nn[0].Prefs.LoggedOut, qt.IsFalse)
c.Assert(nn[0].Prefs.WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, *nn[1].State)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Start non-interactive login with no token.
// This will ask controlclient to start its own Login() process,
// then wait for us to respond.
t.Logf("\n\nLogin (noninteractive)")
notifies.expect(0)
b.Login(nil)
{
cc.assertCalls("Login")
notifies.drain(0)
// Note: WantRunning isn't true yet. It'll switch to true
// after a successful login finishes.
// (This behaviour is needed so that b.Login() won't
// start connecting to an old account right away, if one
// exists when you launch another login.)
}
// Attempted non-interactive login with no key; indicate that
// the user needs to visit a login URL.
t.Logf("\n\nLogin (url response)")
notifies.expect(1)
url1 := "http://localhost:1/1"
cc.send(nil, url1, false, nil)
{
cc.assertCalls("unpause")
// ...but backend eats that notification, because the user
// didn't explicitly request interactive login yet, and
// we're already in NeedsLogin state.
nn := notifies.drain(1)
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[0].Prefs.LoggedOut, qt.IsFalse)
c.Assert(nn[0].Prefs.WantRunning, qt.IsFalse)
}
// Now we'll try an interactive login.
// Since we provided an interactive URL earlier, this shouldn't
// ask control to do anything. Instead backend will emit an event
// indicating that the UI should browse to the given URL.
t.Logf("\n\nLogin (interactive)")
notifies.expect(1)
b.StartLoginInteractive()
{
nn := notifies.drain(1)
cc.assertCalls("unpause")
c.Assert(nn[0].BrowseToURL, qt.IsNotNil)
c.Assert(url1, qt.Equals, *nn[0].BrowseToURL)
}
// Sometimes users press the Login button again, in the middle of
// a login sequence. For example, they might have closed their
// browser window without logging in, or they waited too long and
// the login URL expired. If they start another interactive login,
// we must always get a *new* login URL first.
t.Logf("\n\nLogin2 (interactive)")
notifies.expect(0)
b.StartLoginInteractive()
{
notifies.drain(0)
// backend asks control for another login sequence
cc.assertCalls("Login")
}
// Provide a new interactive login URL.
t.Logf("\n\nLogin2 (url response)")
notifies.expect(1)
url2 := "http://localhost:1/2"
cc.send(nil, url2, false, nil)
{
cc.assertCalls("unpause", "unpause")
// This time, backend should emit it to the UI right away,
// because the UI is anxiously awaiting a new URL to visit.
nn := notifies.drain(1)
c.Assert(nn[0].BrowseToURL, qt.IsNotNil)
c.Assert(url2, qt.Equals, *nn[0].BrowseToURL)
}
// Pretend that the interactive login actually happened.
// Controlclient always sends the netmap and LoginFinished at the
// same time.
// The backend should propagate this upward for the UI.
t.Logf("\n\nLoginFinished")
notifies.expect(3)
cc.setAuthBlocked(false)
cc.persist.LoginName = "user1"
cc.send(nil, "", true, &netmap.NetworkMap{})
{
nn := notifies.drain(3)
// Arguably it makes sense to unpause now, since the machine
// authorization status is part of the netmap.
//
// BUG: backend unblocks wgengine at this point, even though
// our machine key is not authorized. It probably should
// wait until it gets into Starting.
// TODO: (Currently this test doesn't detect that bug, but
// it's visible in the logs)
cc.assertCalls("unpause", "unpause", "unpause")
c.Assert(nn[0].LoginFinished, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(nn[2].State, qt.IsNotNil)
c.Assert(nn[1].Prefs.Persist.LoginName, qt.Equals, "user1")
c.Assert(ipn.NeedsMachineAuth, qt.Equals, *nn[2].State)
}
// Pretend that the administrator has authorized our machine.
t.Logf("\n\nMachineAuthorized")
notifies.expect(1)
// BUG: the real controlclient sends LoginFinished with every
// notification while it's in StateAuthenticated, but not StateSynced.
// It should send it exactly once, or every time we're authenticated,
// but the current code is brittle.
// (ie. I suspect it would be better to change false->true in send()
// below, and do the same in the real controlclient.)
cc.send(nil, "", false, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause", "unpause")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
}
// TODO: add a fake DERP server to our fake netmap, so we can
// transition to the Running state here.
// TODO: test what happens when the admin forcibly deletes our key.
// (ie. unsolicited logout)
// TODO: test what happens when our key expires, client side.
// (and when it gets close to expiring)
// The user changes their preference to !WantRunning.
t.Logf("\n\nWantRunning -> false")
notifies.expect(2)
b.EditPrefs(&ipn.MaskedPrefs{
WantRunningSet: true,
Prefs: ipn.Prefs{WantRunning: false},
})
{
nn := notifies.drain(2)
cc.assertCalls("pause")
// BUG: I would expect Prefs to change first, and state after.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(ipn.Stopped, qt.Equals, *nn[0].State)
}
// The user changes their preference to WantRunning after all.
t.Logf("\n\nWantRunning -> true")
store.awaitWrite()
notifies.expect(2)
b.EditPrefs(&ipn.MaskedPrefs{
WantRunningSet: true,
Prefs: ipn.Prefs{WantRunning: true},
})
{
nn := notifies.drain(2)
// BUG: Login isn't needed here. We never logged out.
cc.assertCalls("Login", "unpause", "unpause")
// BUG: I would expect Prefs to change first, and state after.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
c.Assert(store.sawWrite(), qt.IsTrue)
}
// Test the fast-path frontend reconnection.
// This one is very finicky, so we have to force State==Running
// or it won't use the fast path.
// TODO: actually get to State==Running, rather than cheating.
// That'll require spinning up a fake DERP server and putting it in
// the netmap.
t.Logf("\n\nFastpath Start()")
notifies.expect(1)
b.state = ipn.Running
c.Assert(b.Start(ipn.Options{StateKey: ipn.GlobalDaemonStateKey}), qt.IsNil)
{
nn := notifies.drain(1)
cc.assertCalls()
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[0].LoginFinished, qt.IsNotNil)
c.Assert(nn[0].NetMap, qt.IsNotNil)
c.Assert(nn[0].Prefs, qt.IsNotNil)
}
// undo the state hack above.
b.state = ipn.Starting
// User wants to logout.
store.awaitWrite()
t.Logf("\n\nLogout (async)")
notifies.expect(2)
b.Logout()
{
nn := notifies.drain(2)
cc.assertCalls("pause", "StartLogout", "pause")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(ipn.Stopped, qt.Equals, *nn[0].State)
c.Assert(nn[1].Prefs.LoggedOut, qt.IsTrue)
c.Assert(nn[1].Prefs.WantRunning, qt.IsFalse)
c.Assert(ipn.Stopped, qt.Equals, b.State())
c.Assert(store.sawWrite(), qt.IsTrue)
}
// Let's make the logout succeed.
t.Logf("\n\nLogout (async) - succeed")
notifies.expect(1)
cc.setAuthBlocked(true)
cc.send(nil, "", false, nil)
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.NeedsLogin, qt.Equals, *nn[0].State)
c.Assert(b.Prefs().LoggedOut, qt.IsTrue)
c.Assert(b.Prefs().WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// A second logout should do nothing, since the prefs haven't changed.
t.Logf("\n\nLogout2 (async)")
notifies.expect(0)
b.Logout()
{
notifies.drain(0)
// BUG: the backend has already called StartLogout, and we're
// still logged out. So it shouldn't call it again.
cc.assertCalls("StartLogout", "unpause")
cc.assertCalls()
c.Assert(b.Prefs().LoggedOut, qt.IsTrue)
c.Assert(b.Prefs().WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Let's acknowledge the second logout too.
t.Logf("\n\nLogout2 (async) - succeed")
notifies.expect(0)
cc.setAuthBlocked(true)
cc.send(nil, "", false, nil)
{
notifies.drain(0)
cc.assertCalls("unpause", "unpause")
c.Assert(b.Prefs().LoggedOut, qt.IsTrue)
c.Assert(b.Prefs().WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Try the synchronous logout feature.
t.Logf("\n\nLogout3 (sync)")
notifies.expect(0)
b.LogoutSync(context.Background())
// NOTE: This returns as soon as cc.Logout() returns, which is okay
// I guess, since that's supposed to be synchronous.
{
notifies.drain(0)
cc.assertCalls("Logout", "unpause")
c.Assert(b.Prefs().LoggedOut, qt.IsTrue)
c.Assert(b.Prefs().WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Generate the third logout event.
t.Logf("\n\nLogout3 (sync) - succeed")
notifies.expect(0)
cc.setAuthBlocked(true)
cc.send(nil, "", false, nil)
{
notifies.drain(0)
cc.assertCalls("unpause", "unpause")
c.Assert(b.Prefs().LoggedOut, qt.IsTrue)
c.Assert(b.Prefs().WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Oh, you thought we were done? Ha! Now we have to test what
// happens if the user exits and restarts while logged out.
// Note that it's explicitly okay to call b.Start() over and over
// again, every time the frontend reconnects.
// TODO: test user switching between statekeys.
// The frontend restarts!
t.Logf("\n\nStart3")
notifies.expect(2)
c.Assert(b.Start(ipn.Options{StateKey: ipn.GlobalDaemonStateKey}), qt.IsNil)
{
// BUG: We already called Shutdown(), no need to do it again.
// BUG: don't unpause because we're not logged in.
cc.assertCalls("Shutdown", "unpause", "New", "unpause")
nn := notifies.drain(2)
cc.assertCalls()
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[1].State, qt.IsNotNil)
c.Assert(nn[0].Prefs.LoggedOut, qt.IsTrue)
c.Assert(nn[0].Prefs.WantRunning, qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, *nn[1].State)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Let's break the rules a little. Our control server accepts
// your invalid login attempt, with no need for an interactive login.
// (This simulates an admin reviving a key that you previously
// disabled.)
t.Logf("\n\nLoginFinished3")
notifies.expect(3)
cc.setAuthBlocked(false)
cc.persist.LoginName = "user2"
cc.send(nil, "", true, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(3)
cc.assertCalls("unpause", "unpause", "unpause")
c.Assert(nn[0].LoginFinished, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(nn[2].State, qt.IsNotNil)
// Prefs after finishing the login, so LoginName updated.
c.Assert(nn[1].Prefs.Persist.LoginName, qt.Equals, "user2")
c.Assert(nn[1].Prefs.LoggedOut, qt.IsFalse)
c.Assert(nn[1].Prefs.WantRunning, qt.IsTrue)
c.Assert(ipn.Starting, qt.Equals, *nn[2].State)
}
// Now we've logged in successfully. Let's disconnect.
t.Logf("\n\nWantRunning -> false")
notifies.expect(2)
b.EditPrefs(&ipn.MaskedPrefs{
WantRunningSet: true,
Prefs: ipn.Prefs{WantRunning: false},
})
{
nn := notifies.drain(2)
cc.assertCalls("pause")
// BUG: I would expect Prefs to change first, and state after.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(ipn.Stopped, qt.Equals, *nn[0].State)
c.Assert(nn[1].Prefs.LoggedOut, qt.IsFalse)
}
// One more restart, this time with a valid key, but WantRunning=false.
t.Logf("\n\nStart4")
notifies.expect(2)
c.Assert(b.Start(ipn.Options{StateKey: ipn.GlobalDaemonStateKey}), qt.IsNil)
{
// NOTE: cc.Shutdown() is correct here, since we didn't call
// b.Shutdown() explicitly ourselves.
// Note: unpause happens because ipn needs to get at least one netmap
// on startup, otherwise UIs can't show the node list, login
// name, etc when in state ipn.Stopped.
// Arguably they shouldn't try. But they currently do.
nn := notifies.drain(2)
cc.assertCalls("Shutdown", "unpause", "New", "Login", "unpause")
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[1].State, qt.IsNotNil)
c.Assert(nn[0].Prefs.WantRunning, qt.IsFalse)
c.Assert(nn[0].Prefs.LoggedOut, qt.IsFalse)
c.Assert(ipn.Stopped, qt.Equals, *nn[1].State)
}
// When logged in but !WantRunning, ipn leaves us unpaused to retrieve
// the first netmap. Simulate that netmap being received, after which
// it should pause us, to avoid wasting CPU retrieving unnecessarily
// additional netmap updates.
//
// TODO: really the various GUIs and prefs should be refactored to
// not require the netmap structure at all when starting while
// !WantRunning. That would remove the need for this (or contacting
// the control server at all when stopped).
t.Logf("\n\nStart4 -> netmap")
notifies.expect(0)
cc.send(nil, "", true, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
{
notifies.drain(0)
cc.assertCalls("pause", "pause")
}
// Request connection.
// The state machine didn't call Login() earlier, so now it needs to.
t.Logf("\n\nWantRunning4 -> true")
notifies.expect(2)
b.EditPrefs(&ipn.MaskedPrefs{
WantRunningSet: true,
Prefs: ipn.Prefs{WantRunning: true},
})
{
nn := notifies.drain(2)
cc.assertCalls("Login", "unpause", "unpause")
// BUG: I would expect Prefs to change first, and state after.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
}
// Disconnect.
t.Logf("\n\nStop")
notifies.expect(2)
b.EditPrefs(&ipn.MaskedPrefs{
WantRunningSet: true,
Prefs: ipn.Prefs{WantRunning: false},
})
{
nn := notifies.drain(2)
cc.assertCalls("pause")
// BUG: I would expect Prefs to change first, and state after.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(ipn.Stopped, qt.Equals, *nn[0].State)
}
// We want to try logging in as a different user, while Stopped.
// First, start the login process (without logging out first).
t.Logf("\n\nLoginDifferent")
notifies.expect(1)
b.StartLoginInteractive()
url3 := "http://localhost:1/3"
cc.send(nil, url3, false, nil)
{
nn := notifies.drain(1)
// It might seem like WantRunning should switch to true here,
// but that would be risky since we already have a valid
// user account. It might try to reconnect to the old account
// before the new one is ready. So no change yet.
//
// Because the login hasn't yet completed, the old login
// is still valid, so it's correct that we stay paused.
cc.assertCalls("Login", "pause", "pause")
c.Assert(nn[0].BrowseToURL, qt.IsNotNil)
c.Assert(*nn[0].BrowseToURL, qt.Equals, url3)
}
// Now, let's complete the interactive login, using a different
// user account than before. WantRunning changes to true after an
// interactive login, so we end up unpaused.
t.Logf("\n\nLoginDifferent URL visited")
notifies.expect(3)
cc.persist.LoginName = "user3"
cc.send(nil, "", true, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(3)
// BUG: pause() being called here is a bad sign.
// It means that either the state machine ran at least once
// with the old netmap, or it ran with the new login+netmap
// and !WantRunning. But since it's a fresh and successful
// new login, WantRunning is true, so there was never a
// reason to pause().
cc.assertCalls("pause", "unpause", "unpause")
c.Assert(nn[0].LoginFinished, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(nn[2].State, qt.IsNotNil)
// Prefs after finishing the login, so LoginName updated.
c.Assert(nn[1].Prefs.Persist.LoginName, qt.Equals, "user3")
c.Assert(nn[1].Prefs.LoggedOut, qt.IsFalse)
c.Assert(nn[1].Prefs.WantRunning, qt.IsTrue)
c.Assert(ipn.Starting, qt.Equals, *nn[2].State)
}
// The last test case is the most common one: restarting when both
// logged in and WantRunning.
t.Logf("\n\nStart5")
notifies.expect(1)
c.Assert(b.Start(ipn.Options{StateKey: ipn.GlobalDaemonStateKey}), qt.IsNil)
{
// NOTE: cc.Shutdown() is correct here, since we didn't call
// b.Shutdown() ourselves.
cc.assertCalls("Shutdown", "unpause", "New", "Login", "unpause")
nn := notifies.drain(1)
cc.assertCalls()
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[0].Prefs.LoggedOut, qt.IsFalse)
c.Assert(nn[0].Prefs.WantRunning, qt.IsTrue)
c.Assert(ipn.NoState, qt.Equals, b.State())
}
// Control server accepts our valid key from before.
t.Logf("\n\nLoginFinished5")
notifies.expect(1)
cc.setAuthBlocked(false)
cc.send(nil, "", true, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause", "unpause")
// NOTE: No LoginFinished message since no interactive
// login was needed.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
// NOTE: No prefs change this time. WantRunning stays true.
// We were in Starting in the first place, so that doesn't
// change either.
c.Assert(ipn.Starting, qt.Equals, b.State())
}
t.Logf("\n\nExpireKey")
notifies.expect(1)
cc.send(nil, "", false, &netmap.NetworkMap{
Expiry: time.Now().Add(-time.Minute),
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.NeedsLogin, qt.Equals, *nn[0].State)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
c.Assert(b.isEngineBlocked(), qt.IsTrue)
}
t.Logf("\n\nExtendKey")
notifies.expect(1)
cc.send(nil, "", false, &netmap.NetworkMap{
Expiry: time.Now().Add(time.Minute),
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause", "unpause")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
c.Assert(ipn.Starting, qt.Equals, b.State())
c.Assert(b.isEngineBlocked(), qt.IsFalse)
}
notifies.expect(1)
// Fake a DERP connection.
b.setWgengineStatus(&wgengine.Status{DERPs: 1, AsOf: time.Now()}, nil)
{
nn := notifies.drain(1)
cc.assertCalls("unpause")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Running, qt.Equals, *nn[0].State)
c.Assert(ipn.Running, qt.Equals, b.State())
}
}
type testStateStorage struct {
mem mem.Store
written syncs.AtomicBool
}
func (s *testStateStorage) ReadState(id ipn.StateKey) ([]byte, error) {
return s.mem.ReadState(id)
}
func (s *testStateStorage) WriteState(id ipn.StateKey, bs []byte) error {
s.written.Set(true)
return s.mem.WriteState(id, bs)
}
// awaitWrite clears the "I've seen writes" bit, in prep for a future
// call to sawWrite to see if a write arrived.
func (s *testStateStorage) awaitWrite() { s.written.Set(false) }
// sawWrite reports whether there's been a WriteState call since the most
// recent awaitWrite call.
func (s *testStateStorage) sawWrite() bool {
v := s.written.Get()
s.awaitWrite()
return v
}
func TestWGEngineStatusRace(t *testing.T) {
t.Skip("test fails")
c := qt.New(t)
logf := t.Logf
eng, err := wgengine.NewFakeUserspaceEngine(logf, 0)
c.Assert(err, qt.IsNil)
t.Cleanup(eng.Close)
b, err := NewLocalBackend(logf, "logid", new(mem.Store), nil, eng, 0)
c.Assert(err, qt.IsNil)
cc := newMockControl(t)
b.SetControlClientGetterForTesting(func(opts controlclient.Options) (controlclient.Client, error) {
cc.mu.Lock()
defer cc.mu.Unlock()
cc.logfActual = opts.Logf
return cc, nil
})
var state ipn.State
b.SetNotifyCallback(func(n ipn.Notify) {
if n.State != nil {
state = *n.State
}
})
wantState := func(want ipn.State) {
c.Assert(want, qt.Equals, state)
}
// Start with the zero value.
wantState(ipn.NoState)
// Start the backend.
err = b.Start(ipn.Options{StateKey: ipn.GlobalDaemonStateKey})
c.Assert(err, qt.IsNil)
wantState(ipn.NeedsLogin)
// Assert that we are logged in and authorized.
cc.send(nil, "", true, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
wantState(ipn.Starting)
// Simulate multiple concurrent callbacks from wgengine.
// Any single callback with DERPS > 0 is enough to transition
// from Starting to Running, at which point we stay there.
// Thus if these callbacks occurred serially, in any order,
// we would end up in state ipn.Running.
// The same should thus be true if these callbacks occur concurrently.
var wg sync.WaitGroup
for i := 0; i < 100; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
n := 0
if i == 0 {
n = 1
}
b.setWgengineStatus(&wgengine.Status{AsOf: time.Now(), DERPs: n}, nil)
}(i)
}
wg.Wait()
wantState(ipn.Running)
}