cmd/tailscale/cli: prevent concurrent Start calls in 'up'

Seems to deflake tstest/integration tests. I can't reproduce it
anymore on one of my VMs that was consistently flaking after a dozen
runs before. Now I can run hundreds of times.

Updates #11649
Fixes #7036

Change-Id: I2f7d4ae97500d507bdd78af9e92cd1242e8e44b8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2024-04-16 09:10:50 -07:00 committed by Brad Fitzpatrick
parent 26f9bbc02b
commit 0fba9e7570
3 changed files with 31 additions and 16 deletions

View File

@ -496,11 +496,23 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
running := make(chan bool, 1) // gets value once in state ipn.Running
pumpErr := make(chan error, 1)
var printed bool // whether we've yet printed anything to stdout or stderr
var loginOnce sync.Once
startLoginInteractive := func() { loginOnce.Do(func() { localClient.StartLoginInteractive(ctx) }) }
// localAPIMu should be held while doing mutable LocalAPI calls
// to the backend. In particular, it prevents StartLoginInteractive from
// being called from the watcher goroutine while the Start call from
// the other goroutine is in progress.
// See https://github.com/tailscale/tailscale/issues/7036#issuecomment-2053771466
// TODO(bradfitz): simplify this once #11649 is cleaned up and Start is
// hopefully removed.
var localAPIMu sync.Mutex
startLoginInteractive := sync.OnceFunc(func() {
localAPIMu.Lock()
defer localAPIMu.Unlock()
localClient.StartLoginInteractive(ctx)
})
go func() {
var printed bool // whether we've yet printed anything to stdout or stderr
for {
n, err := watcher.Next()
if err != nil {
@ -574,12 +586,14 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
// Special case: bare "tailscale up" means to just start
// running, if there's ever been a login.
if simpleUp {
localAPIMu.Lock()
_, err := localClient.EditPrefs(ctx, &ipn.MaskedPrefs{
Prefs: ipn.Prefs{
WantRunning: true,
},
WantRunningSet: true,
})
localAPIMu.Unlock()
if err != nil {
return err
}
@ -596,10 +610,13 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
if err != nil {
return err
}
if err := localClient.Start(ctx, ipn.Options{
localAPIMu.Lock()
err = localClient.Start(ctx, ipn.Options{
AuthKey: authKey,
UpdatePrefs: prefs,
}); err != nil {
})
localAPIMu.Unlock()
if err != nil {
return err
}
if upArgs.forceReauth {

View File

@ -345,7 +345,6 @@ func TestConfigFileAuthKey(t *testing.T) {
func TestTwoNodes(t *testing.T) {
tstest.Shard(t)
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/3598")
tstest.Parallel(t)
env := newTestEnv(t)
@ -400,7 +399,6 @@ func TestTwoNodes(t *testing.T) {
// PeersRemoved set) saying that the second node disappeared.
func TestIncrementalMapUpdatePeersRemoved(t *testing.T) {
tstest.Shard(t)
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/3598")
tstest.Parallel(t)
env := newTestEnv(t)
@ -658,17 +656,14 @@ func TestNoControlConnWhenDown(t *testing.T) {
d2 := n1.StartDaemon()
n1.AwaitResponding()
st := n1.MustStatus()
if got, want := st.BackendState, "Stopped"; got != want {
t.Fatalf("after restart, state = %q; want %q", got, want)
}
n1.AwaitBackendState("Stopped")
ip2 := n1.AwaitIP4()
if ip1 != ip2 {
t.Errorf("IPs different: %q vs %q", ip1, ip2)
}
// The real test: verify our daemon doesn't have an HTTP request open.:
// The real test: verify our daemon doesn't have an HTTP request open.
if n := env.Control.InServeMap(); n != 0 {
t.Errorf("in serve map = %d; want 0", n)
}
@ -1007,7 +1002,6 @@ func newTestEnv(t testing.TB, opts ...testEnvOpt) *testEnv {
if runtime.GOOS == "windows" {
t.Skip("not tested/working on Windows yet")
}
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/7036")
derpMap := RunDERPAndSTUN(t, logger.Discard, "127.0.0.1")
logc := new(LogCatcher)
control := &testcontrol.Server{
@ -1389,6 +1383,10 @@ func (n *testNode) AwaitIP6() netip.Addr {
// AwaitRunning waits for n to reach the IPN state "Running".
func (n *testNode) AwaitRunning() {
n.AwaitBackendState("Running")
}
func (n *testNode) AwaitBackendState(state string) {
t := n.env.t
t.Helper()
if err := tstest.WaitFor(20*time.Second, func() error {
@ -1396,8 +1394,8 @@ func (n *testNode) AwaitRunning() {
if err != nil {
return err
}
if st.BackendState != "Running" {
return fmt.Errorf("in state %q", st.BackendState)
if st.BackendState != state {
return fmt.Errorf("in state %q; want %q", st.BackendState, state)
}
return nil
}); err != nil {

View File

@ -626,7 +626,7 @@ func (localhostListener) ListenPacket(ctx context.Context, network, address stri
}
func TestTwoDevicePing(t *testing.T) {
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/1277")
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/11762")
l, ip := localhostListener{}, netaddr.IPv4(127, 0, 0, 1)
n := &devices{
m1: l,