cmd/tailscale,controlclient,ipnlocal: fix 'up', deflake tests more

The CLI's "up" is kinda chaotic and LocalBackend.Start is kinda
chaotic and they both need to be redone/deleted (respectively), but
this fixes some buggy behavior meanwhile. We were previously calling
StartLoginInteractive (to start the controlclient's RegisterRequest)
redundantly in some cases, causing test flakes depending on timing and
up's weird state machine.

We only need to call StartLoginInteractive in the client if Start itself
doesn't. But Start doesn't tell us that. So cheat a bit and a put the
information about whether there's a current NodeKey in the ipn.Status.
It used to be accessible over LocalAPI via GetPrefs as a private key but
we removed that for security. But a bool is fine.

So then only call StartLoginInteractive if that bool is false and don't
do it in the WatchIPNBus loop.

Fixes #12028
Updates #12042

Change-Id: I0923c3f704a9d6afd825a858eb9a63ca7c1df294
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2024-05-06 21:33:37 -07:00 committed by Brad Fitzpatrick
parent e5ef35857f
commit e968b0ecd7
6 changed files with 85 additions and 20 deletions

View File

@ -410,6 +410,11 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
// printAuthURL reports whether we should print out the
// provided auth URL from an IPN notify.
printAuthURL := func(url string) bool {
if url == "" {
// Probably unnecessary but we used to have a bug where tailscaled
// could send an empty URL over the IPN bus. ~Harmless to keep.
return false
}
if upArgs.authKeyOrFile != "" {
// Issue 1755: when using an authkey, don't
// show an authURL that might still be pending
@ -527,8 +532,11 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
if err != nil {
return err
}
if upArgs.forceReauth {
localClient.StartLoginInteractive(ctx)
if upArgs.forceReauth || !st.HaveNodeKey {
err := localClient.StartLoginInteractive(ctx)
if err != nil {
return err
}
}
}
@ -540,6 +548,8 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
go func() {
var printed bool // whether we've yet printed anything to stdout or stderr
var lastURLPrinted string
for {
n, err := watcher.Next()
if err != nil {
@ -552,8 +562,6 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
}
if s := n.State; s != nil {
switch *s {
case ipn.NeedsLogin:
localClient.StartLoginInteractive(ctx)
case ipn.NeedsMachineAuth:
printed = true
if env.upArgs.json {
@ -576,12 +584,17 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
cancelWatch()
}
}
if url := n.BrowseToURL; url != nil && printAuthURL(*url) {
if url := n.BrowseToURL; url != nil {
authURL := *url
if !printAuthURL(authURL) || authURL == lastURLPrinted {
continue
}
printed = true
lastURLPrinted = authURL
if upArgs.json {
js := &upOutputJSON{AuthURL: *url, BackendState: st.BackendState}
js := &upOutputJSON{AuthURL: authURL, BackendState: st.BackendState}
q, err := qrcode.New(*url, qrcode.Medium)
q, err := qrcode.New(authURL, qrcode.Medium)
if err == nil {
png, err := q.PNG(128)
if err == nil {
@ -596,9 +609,9 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
outln(string(data))
}
} else {
fmt.Fprintf(Stderr, "\nTo authenticate, visit:\n\n\t%s\n\n", *url)
fmt.Fprintf(Stderr, "\nTo authenticate, visit:\n\n\t%s\n\n", authURL)
if upArgs.qr {
q, err := qrcode.New(*url, qrcode.Medium)
q, err := qrcode.New(authURL, qrcode.Medium)
if err != nil {
log.Printf("QR code error: %v", err)
} else {

View File

@ -348,9 +348,14 @@ func (c *Auto) authRoutine() {
continue
}
if url != "" {
// goal.url ought to be empty here.
// However, not all control servers get this right,
// and logging about it here just generates noise.
// goal.url ought to be empty here. However, not all control servers
// get this right, and logging about it here just generates noise.
//
// TODO(bradfitz): I don't follow that comment. Our own testcontrol
// used by tstest/integration hits this path, in fact.
if c.direct.panicOnUse {
panic("tainted client")
}
c.mu.Lock()
c.urlToVisit = url
c.loginGoal = &LoginGoal{
@ -615,6 +620,9 @@ func (c *Auto) Login(t *tailcfg.Oauth2Token, flags LoginFlags) {
if c.closed {
return
}
if c.direct != nil && c.direct.panicOnUse {
panic("tainted client")
}
c.wantLoggedIn = true
c.loginGoal = &LoginGoal{
token: t,
@ -632,6 +640,9 @@ func (c *Auto) Logout(ctx context.Context) error {
c.wantLoggedIn = false
c.loginGoal = nil
closed := c.closed
if c.direct != nil && c.direct.panicOnUse {
panic("tainted client")
}
c.mu.Unlock()
if closed {

View File

@ -80,6 +80,7 @@ type Direct struct {
onClientVersion func(*tailcfg.ClientVersion) // or nil
onControlTime func(time.Time) // or nil
onTailnetDefaultAutoUpdate func(bool) // or nil
panicOnUse bool // if true, panic if client is used (for testing)
dialPlan ControlDialPlanner // can be nil
@ -310,6 +311,9 @@ func NewDirect(opts Options) (*Direct, error) {
}
c.serverNoiseKey = key.NewMachine().Public() // prevent early error before hitting test client
}
if strings.Contains(opts.ServerURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") {
c.panicOnUse = true
}
return c, nil
}
@ -399,7 +403,7 @@ func (c *Direct) TryLogout(ctx context.Context) error {
func (c *Direct) TryLogin(ctx context.Context, t *tailcfg.Oauth2Token, flags LoginFlags) (url string, err error) {
if strings.Contains(c.serverURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") {
panic("[unexpected] controlclient: TryLogin called on " + c.serverURL)
panic(fmt.Sprintf("[unexpected] controlclient: TryLogin called on %s; tainted=%v", c.serverURL, c.panicOnUse))
}
c.logf("[v1] direct.TryLogin(token=%v, flags=%v)", t != nil, flags)
return c.doLoginOrRegen(ctx, loginOpt{Token: t, Flags: flags})
@ -462,6 +466,9 @@ func (c *Direct) hostInfoLocked() *tailcfg.Hostinfo {
}
func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, newURL string, nks tkatype.MarshaledSignature, err error) {
if c.panicOnUse {
panic("tainted client")
}
c.mu.Lock()
persist := c.persist.AsStruct()
tryingNewKey := c.tryingNewKey
@ -835,6 +842,9 @@ const watchdogTimeout = 120 * time.Second
//
// If nu is nil, OmitPeers will be set to true.
func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu NetmapUpdater) error {
if c.panicOnUse {
panic("tainted client")
}
if isStreaming && nu == nil {
panic("cb must be non-nil if isStreaming is true")
}
@ -1531,6 +1541,9 @@ func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err er
}
func (c *Direct) DoNoiseRequest(req *http.Request) (*http.Response, error) {
if c.panicOnUse {
panic("tainted client")
}
nc, err := c.getNoiseClient()
if err != nil {
return nil, err
@ -1627,6 +1640,9 @@ func (c *Direct) ReportHealthChange(sys health.Subsystem, sysErr error) {
if !ok {
return
}
if c.panicOnUse {
panic("tainted client")
}
req := &tailcfg.HealthChangeRequest{
Subsys: string(sys),
NodeKey: nodeKey,

View File

@ -790,6 +790,7 @@ func (b *LocalBackend) UpdateStatus(sb *ipnstate.StatusBuilder) {
s.ClientVersion = b.lastClientVersion
}
s.Health = b.health.AppendWarnings(s.Health)
s.HaveNodeKey = b.hasNodeKeyLocked()
// TODO(bradfitz): move this health check into a health.Warnable
// and remove from here.
@ -4605,6 +4606,9 @@ func (b *LocalBackend) resetControlClientLocked() controlclient.Client {
return nil
}
b.authURL = ""
b.authURLSticky = ""
// When we clear the control client, stop any outstanding netmap expiry
// timer; synthesizing a new netmap while we don't have a control
// client will break things.

View File

@ -41,6 +41,9 @@ type Status struct {
// "Starting", "Running".
BackendState string
// HaveNodeKey is whether the current profile has a node key configured.
HaveNodeKey bool `json:",omitempty"`
AuthURL string // current URL provided by control to authorize client
TailscaleIPs []netip.Addr // Tailscale IP(s) assigned to this node
Self *PeerStatus

View File

@ -259,7 +259,7 @@ func TestStateSavedOnStart(t *testing.T) {
n1.MustDown()
// And change the hostname to something:
if err := n1.Tailscale("up", "--login-server="+n1.env.ControlServer.URL, "--hostname=foo").Run(); err != nil {
if err := n1.Tailscale("up", "--login-server="+n1.env.controlURL(), "--hostname=foo").Run(); err != nil {
t.Fatalf("up: %v", err)
}
@ -289,9 +289,9 @@ func TestOneNodeUpAuth(t *testing.T) {
st := n1.MustStatus()
t.Logf("Status: %s", st.BackendState)
t.Logf("Running up --login-server=%s ...", env.ControlServer.URL)
t.Logf("Running up --login-server=%s ...", env.controlURL())
cmd := n1.Tailscale("up", "--login-server="+env.ControlServer.URL)
cmd := n1.Tailscale("up", "--login-server="+env.controlURL())
var authCountAtomic int32
cmd.Stdout = &authURLParserWriter{fn: func(urlStr string) error {
if env.Control.CompleteAuth(urlStr) {
@ -1069,7 +1069,7 @@ func TestAutoUpdateDefaults(t *testing.T) {
// Should not be changed even if sent "true" later.
sendAndCheckDefault(t, n, true, false)
// But can be changed explicitly by the user.
if out, err := n.Tailscale("set", "--auto-update").CombinedOutput(); err != nil {
if out, err := n.TailscaleForOutput("set", "--auto-update").CombinedOutput(); err != nil {
t.Fatalf("failed to enable auto-update on node: %v\noutput: %s", err, out)
}
sendAndCheckDefault(t, n, false, true)
@ -1083,7 +1083,7 @@ func TestAutoUpdateDefaults(t *testing.T) {
// Should not be changed even if sent "false" later.
sendAndCheckDefault(t, n, false, true)
// But can be changed explicitly by the user.
if out, err := n.Tailscale("set", "--auto-update=false").CombinedOutput(); err != nil {
if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil {
t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out)
}
sendAndCheckDefault(t, n, true, false)
@ -1093,7 +1093,7 @@ func TestAutoUpdateDefaults(t *testing.T) {
desc: "user-sets-first",
run: func(t *testing.T, n *testNode) {
// User sets auto-update first, before receiving defaults.
if out, err := n.Tailscale("set", "--auto-update=false").CombinedOutput(); err != nil {
if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil {
t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out)
}
// Defaults sent from control should be ignored.
@ -1135,6 +1135,16 @@ type testEnv struct {
TrafficTrapServer *httptest.Server
}
// controlURL returns e.ControlServer.URL, panicking if it's the empty string,
// which it should never be in tests.
func (e *testEnv) controlURL() string {
s := e.ControlServer.URL
if s == "" {
panic("control server not set")
}
return s
}
type testEnvOpt interface {
modifyTestEnv(*testEnv)
}
@ -1183,6 +1193,7 @@ func newTestEnv(t testing.TB, opts ...testEnvOpt) *testEnv {
e.TrafficTrapServer.Close()
e.ControlServer.Close()
})
t.Logf("control URL: %v", e.controlURL())
return e
}
@ -1445,7 +1456,7 @@ func (n *testNode) MustUp(extraArgs ...string) {
t.Helper()
args := []string{
"up",
"--login-server=" + n.env.ControlServer.URL,
"--login-server=" + n.env.controlURL(),
"--reset",
}
args = append(args, extraArgs...)
@ -1585,6 +1596,13 @@ func (n *testNode) AwaitNeedsLogin() {
}
}
func (n *testNode) TailscaleForOutput(arg ...string) *exec.Cmd {
cmd := n.Tailscale(arg...)
cmd.Stdout = nil
cmd.Stderr = nil
return cmd
}
// Tailscale returns a command that runs the tailscale CLI with the provided arguments.
// It does not start the process.
func (n *testNode) Tailscale(arg ...string) *exec.Cmd {