From 6e55d8f6a1cb93b521acf386b2a4f9ca769af05d Mon Sep 17 00:00:00 2001 From: Andrea Gottardo Date: Tue, 25 Jun 2024 22:02:38 -0700 Subject: [PATCH] health: add warming-up warnable (#12553) --- health/health.go | 35 ++++++++++++++++++++++++++++++++++- health/health_test.go | 11 +++++++---- health/state.go | 9 ++++++++- health/warnings.go | 15 +++++++++++++++ 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/health/health.go b/health/health.go index c1740b1f1..437c3465d 100644 --- a/health/health.go +++ b/health/health.go @@ -92,7 +92,8 @@ type Tracker struct { lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest ipnState string ipnWantRunning bool - anyInterfaceUp opt.Bool // empty means unknown (assume true) + ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true + anyInterfaceUp opt.Bool // empty means unknown (assume true) udp4Unbound bool controlHealth []string lastLoginErr error @@ -705,7 +706,29 @@ func (t *Tracker) SetIPNState(state string, wantRunning bool) { t.mu.Lock() defer t.mu.Unlock() t.ipnState = state + prevWantRunning := t.ipnWantRunning t.ipnWantRunning = wantRunning + + if state == "Running" { + // Any time we are told the backend is Running (control+DERP are connected), the Warnable + // should be set to healthy, no matter if 5 seconds have passed or not. + t.setHealthyLocked(warmingUpWarnable) + } else if wantRunning && !prevWantRunning && t.ipnWantRunningLastTrue.IsZero() { + // The first time we see wantRunning=true and it used to be false, it means the user requested + // the backend to start. We store this timestamp and use it to silence some warnings that are + // expected during startup. + t.ipnWantRunningLastTrue = time.Now() + t.setUnhealthyLocked(warmingUpWarnable, nil) + time.AfterFunc(warmingUpWarnableDuration, func() { + t.mu.Lock() + t.updateWarmingUpWarnableLocked() + t.mu.Unlock() + }) + } else if !wantRunning { + // Reset the timer when the user decides to stop the backend. + t.ipnWantRunningLastTrue = time.Time{} + } + t.selfCheckLocked() } @@ -858,6 +881,8 @@ var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR") // updateBuiltinWarnablesLocked performs a number of checks on the state of the backend, // and adds/removes Warnings from the Tracker as needed. func (t *Tracker) updateBuiltinWarnablesLocked() { + t.updateWarmingUpWarnableLocked() + if t.checkForUpdates { if cv := t.latestVersion; cv != nil && !cv.RunningLatest && cv.LatestVersion != "" { if cv.UrgentSecurityUpdate { @@ -1037,6 +1062,14 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { } } +// updateWarmingUpWarnableLocked ensures the warmingUpWarnable is healthy if wantRunning has been set to true +// for more than warmingUpWarnableDuration. +func (t *Tracker) updateWarmingUpWarnableLocked() { + if !t.ipnWantRunningLastTrue.IsZero() && time.Now().After(t.ipnWantRunningLastTrue.Add(warmingUpWarnableDuration)) { + t.setHealthyLocked(warmingUpWarnable) + } +} + // ReceiveFuncStats tracks the calls made to a wireguard-go receive func. type ReceiveFuncStats struct { // name is the name of the receive func. diff --git a/health/health_test.go b/health/health_test.go index 7bfe87242..9ef49b642 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -6,6 +6,7 @@ package health import ( "fmt" "reflect" + "slices" "testing" "time" ) @@ -199,15 +200,17 @@ func TestCheckDependsOnAppearsInUnhealthyState(t *testing.T) { if !ok { t.Fatalf("Expected an UnhealthyState for w1, got nothing") } - if len(us1.DependsOn) != 0 { - t.Fatalf("Expected no DependsOn in the unhealthy state, got: %v", us1.DependsOn) + wantDependsOn := []WarnableCode{warmingUpWarnable.Code} + if !reflect.DeepEqual(us1.DependsOn, wantDependsOn) { + t.Fatalf("Expected DependsOn = %v in the unhealthy state, got: %v", wantDependsOn, us1.DependsOn) } ht.SetUnhealthy(w2, Args{ArgError: "w2 is also unhealthy now"}) us2, ok := ht.CurrentState().Warnings[w2.Code] if !ok { t.Fatalf("Expected an UnhealthyState for w2, got nothing") } - if !reflect.DeepEqual(us2.DependsOn, []WarnableCode{w1.Code}) { - t.Fatalf("Expected DependsOn = [w1.Code] in the unhealthy state, got: %v", us2.DependsOn) + wantDependsOn = slices.Concat([]WarnableCode{w1.Code}, wantDependsOn) + if !reflect.DeepEqual(us2.DependsOn, wantDependsOn) { + t.Fatalf("Expected DependsOn = %v in the unhealthy state, got: %v", wantDependsOn, us2.DependsOn) } } diff --git a/health/state.go b/health/state.go index bd7338381..3f7ea64f5 100644 --- a/health/state.go +++ b/health/state.go @@ -41,11 +41,18 @@ func (w *Warnable) unhealthyState(ws *warningState) *UnhealthyState { text = w.Text(Args{}) } - dependsOnWarnableCodes := make([]WarnableCode, len(w.DependsOn)) + dependsOnWarnableCodes := make([]WarnableCode, len(w.DependsOn), len(w.DependsOn)+1) for i, d := range w.DependsOn { dependsOnWarnableCodes[i] = d.Code } + if w != warmingUpWarnable { + // Here we tell the frontend that all Warnables depend on warmingUpWarnable. GUIs will silence all warnings until all + // their dependencies are healthy. This is a special case to prevent the GUI from showing a bunch of warnings when + // the backend is still warming up. + dependsOnWarnableCodes = append(dependsOnWarnableCodes, warmingUpWarnable.Code) + } + return &UnhealthyState{ WarnableCode: w.Code, Severity: w.Severity, diff --git a/health/warnings.go b/health/warnings.go index 2db9135a8..4e0743aa9 100644 --- a/health/warnings.go +++ b/health/warnings.go @@ -5,6 +5,7 @@ package health import ( "fmt" + "time" ) /** @@ -212,3 +213,17 @@ var controlHealthWarnable = Register(&Warnable{ return fmt.Sprintf("The coordination server is reporting an health issue: %v", args[ArgError]) }, }) + +// warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user +// has changed ipnWantRunning to true from false. +const warmingUpWarnableDuration = 5 * time.Second + +// warmingUpWarnable is a Warnable that is reported by the backend when it is starting up, for a maximum time of +// warmingUpWarnableDuration. The GUIs use the presence of this Warnable to prevent showing any other warnings until +// the backend is fully started. +var warmingUpWarnable = Register(&Warnable{ + Code: "warming-up", + Title: "Tailscale is starting", + Severity: SeverityLow, + Text: StaticMessage("Tailscale is starting. Please wait."), +})