diff --git a/health/health.go b/health/health.go index 861305342..0785c66ca 100644 --- a/health/health.go +++ b/health/health.go @@ -38,10 +38,12 @@ type Tracker struct { // mu guards everything in this var block. mu sync.Mutex - sysErr map[Subsystem]error // subsystem => err (or nil for no error) - watchers set.HandleSet[func(Subsystem, error)] // opt func to run if error state changes - warnables set.Set[*Warnable] - timer *time.Timer + warnables []*Warnable // keys ever set + warnableVal map[*Warnable]error + + sysErr map[Subsystem]error // subsystem => err (or nil for no error) + watchers set.HandleSet[func(Subsystem, error)] // opt func to run if error state changes + timer *time.Timer inMapPoll bool inMapPollSince time.Time @@ -87,19 +89,16 @@ const ( SysTKA = Subsystem("tailnet-lock") ) -// NewWarnable returns a new warnable item that the caller can mark -// as health or in warning state. -func (t *Tracker) NewWarnable(opts ...WarnableOpt) *Warnable { +// NewWarnable returns a new warnable item that the caller can mark as health or +// in warning state via Tracker.SetWarnable. +// +// NewWarnable is generally called in init and stored in a package global. It +// can be used by multiple Trackers. +func NewWarnable(opts ...WarnableOpt) *Warnable { w := new(Warnable) for _, o := range opts { o.mod(w) } - t.mu.Lock() - defer t.mu.Unlock() - if t.warnables == nil { - t.warnables = set.Set[*Warnable]{} - } - t.warnables.Add(w) return w } @@ -132,35 +131,25 @@ type warnOptFunc func(*Warnable) func (f warnOptFunc) mod(w *Warnable) { f(w) } // Warnable is a health check item that may or may not be in a bad warning state. -// The caller of NewWarnable is responsible for calling Set to update the state. +// The caller of NewWarnable is responsible for calling Tracker.SetWarnable to update the state. type Warnable struct { debugFlag string // optional MapRequest.DebugFlag to send when unhealthy // If true, this warning is related to configuration of networking stack // on the machine that impacts connectivity. hasConnectivityImpact bool - - isSet atomic.Bool - mu sync.Mutex - err error } // Set updates the Warnable's state. // If non-nil, it's considered unhealthy. -func (w *Warnable) Set(err error) { - w.mu.Lock() - defer w.mu.Unlock() - w.err = err - w.isSet.Store(err != nil) -} - -func (w *Warnable) get() error { - if !w.isSet.Load() { - return nil +func (t *Tracker) SetWarnable(w *Warnable, err error) { + t.mu.Lock() + defer t.mu.Unlock() + l0 := len(t.warnableVal) + mak.Set(&t.warnableVal, w, err) + if len(t.warnableVal) != l0 { + t.warnables = append(t.warnables, w) } - w.mu.Lock() - defer w.mu.Unlock() - return w.err } // AppendWarnableDebugFlags appends to base any health items that are currently in failed @@ -170,11 +159,11 @@ func (t *Tracker) AppendWarnableDebugFlags(base []string) []string { t.mu.Lock() defer t.mu.Unlock() - for w := range t.warnables { + for w, err := range t.warnableVal { if w.debugFlag == "" { continue } - if err := w.get(); err != nil { + if err != nil { ret = append(ret, w.debugFlag) } } @@ -476,18 +465,20 @@ func (t *Tracker) OverallError() error { var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR") -// networkErrorf creates an error that indicates issues with outgoing network +// networkErrorfLocked creates an error that indicates issues with outgoing network // connectivity. Any active warnings related to network connectivity will // automatically be appended to it. -func (t *Tracker) networkErrorf(format string, a ...any) error { +// +// t.mu must be held. +func (t *Tracker) networkErrorfLocked(format string, a ...any) error { errs := []error{ fmt.Errorf(format, a...), } - for w := range t.warnables { + for _, w := range t.warnables { if !w.hasConnectivityImpact { continue } - if err := w.get(); err != nil { + if err := t.warnableVal[w]; err != nil { errs = append(errs, err) } } @@ -521,7 +512,7 @@ func (t *Tracker) overallErrorLocked() error { } const tooIdle = 2*time.Minute + 5*time.Second if d := now.Sub(t.lastStreamedMapResponse).Round(time.Second); d > tooIdle { - return t.networkErrorf("no map response in %v", d) + return t.networkErrorfLocked("no map response in %v", d) } if !t.derpHomeless { rid := t.derpHomeRegion @@ -529,10 +520,10 @@ func (t *Tracker) overallErrorLocked() error { return errNoDERPHome } if !t.derpRegionConnected[rid] { - return t.networkErrorf("not connected to home DERP region %v", rid) + return t.networkErrorfLocked("not connected to home DERP region %v", rid) } if d := now.Sub(t.derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle { - return t.networkErrorf("haven't heard from home DERP region %v in %v", rid, d) + return t.networkErrorfLocked("haven't heard from home DERP region %v in %v", rid, d) } } if t.udp4Unbound { @@ -557,8 +548,8 @@ func (t *Tracker) overallErrorLocked() error { } errs = append(errs, fmt.Errorf("%v: %w", sys, err)) } - for w := range t.warnables { - if err := w.get(); err != nil { + for _, w := range t.warnables { + if err := t.warnableVal[w]; err != nil { errs = append(errs, err) } } diff --git a/health/health_test.go b/health/health_test.go index e8d69eee1..a8f322eb4 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -14,9 +14,9 @@ func TestAppendWarnableDebugFlags(t *testing.T) { var tr Tracker for i := range 10 { - w := tr.NewWarnable(WithMapDebugFlag(fmt.Sprint(i))) + w := NewWarnable(WithMapDebugFlag(fmt.Sprint(i))) if i%2 == 0 { - w.Set(errors.New("boom")) + tr.SetWarnable(w, errors.New("boom")) } } diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index dd3118acc..a69c0573e 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -1818,7 +1818,7 @@ func (b *LocalBackend) Start(opts ipn.Options) error { return nil } -var warnInvalidUnsignedNodes = health.Global.NewWarnable() +var warnInvalidUnsignedNodes = health.NewWarnable() // updateFilterLocked updates the packet filter in wgengine based on the // given netMap and user preferences. @@ -1851,10 +1851,10 @@ func (b *LocalBackend) updateFilterLocked(netMap *netmap.NetworkMap, prefs ipn.P if packetFilterPermitsUnlockedNodes(b.peers, packetFilter) { err := errors.New("server sent invalid packet filter permitting traffic to unlocked nodes; rejecting all packets for safety") - warnInvalidUnsignedNodes.Set(err) + health.Global.SetWarnable(warnInvalidUnsignedNodes, err) packetFilter = nil } else { - warnInvalidUnsignedNodes.Set(nil) + health.Global.SetWarnable(warnInvalidUnsignedNodes, nil) } } if prefs.Valid() { @@ -3044,7 +3044,7 @@ func (b *LocalBackend) isDefaultServerLocked() bool { return prefs.ControlURLOrDefault() == ipn.DefaultControlURL } -var warnExitNodeUsage = health.Global.NewWarnable(health.WithConnectivityImpact()) +var warnExitNodeUsage = health.NewWarnable(health.WithConnectivityImpact()) // updateExitNodeUsageWarning updates a warnable meant to notify users of // configuration issues that could break exit node usage. @@ -3057,7 +3057,7 @@ func updateExitNodeUsageWarning(p ipn.PrefsView, state *interfaces.State) { result = fmt.Errorf("%s: %v, %s", healthmsg.WarnExitNodeUsage, warn, comment) } } - warnExitNodeUsage.Set(result) + health.Global.SetWarnable(warnExitNodeUsage, result) } func (b *LocalBackend) checkExitNodePrefsLocked(p *ipn.Prefs) error { @@ -5675,13 +5675,13 @@ func (b *LocalBackend) sshServerOrInit() (_ SSHServer, err error) { return b.sshServer, nil } -var warnSSHSELinux = health.Global.NewWarnable() +var warnSSHSELinux = health.NewWarnable() func (b *LocalBackend) updateSELinuxHealthWarning() { if hostinfo.IsSELinuxEnforcing() { - warnSSHSELinux.Set(errors.New("SELinux is enabled; Tailscale SSH may not work. See https://tailscale.com/s/ssh-selinux")) + health.Global.SetWarnable(warnSSHSELinux, errors.New("SELinux is enabled; Tailscale SSH may not work. See https://tailscale.com/s/ssh-selinux")) } else { - warnSSHSELinux.Set(nil) + health.Global.SetWarnable(warnSSHSELinux, nil) } } diff --git a/net/dns/direct_linux.go b/net/dns/direct_linux.go index a2556b720..eb6c4c6e2 100644 --- a/net/dns/direct_linux.go +++ b/net/dns/direct_linux.go @@ -58,7 +58,7 @@ func (m *directManager) runFileWatcher() { } } -var warnTrample = health.Global.NewWarnable() +var warnTrample = health.NewWarnable() // checkForFileTrample checks whether /etc/resolv.conf has been trampled // by another program on the system. (e.g. a DHCP client) @@ -78,7 +78,7 @@ func (m *directManager) checkForFileTrample() { return } if bytes.Equal(cur, want) { - warnTrample.Set(nil) + health.Global.SetWarnable(warnTrample, nil) if lastWarn != nil { m.mu.Lock() m.lastWarnContents = nil @@ -101,7 +101,7 @@ func (m *directManager) checkForFileTrample() { show = show[:1024] } m.logf("trample: resolv.conf changed from what we expected. did some other program interfere? current contents: %q", show) - warnTrample.Set(errors.New("Linux DNS config not ideal. /etc/resolv.conf overwritten. See https://tailscale.com/s/dns-fight")) + health.Global.SetWarnable(warnTrample, errors.New("Linux DNS config not ideal. /etc/resolv.conf overwritten. See https://tailscale.com/s/dns-fight")) } func (m *directManager) closeInotifyOnDone(ctx context.Context, in *gonotify.Inotify) { diff --git a/wgengine/router/ifconfig_windows.go b/wgengine/router/ifconfig_windows.go index 54489fc3c..3e5c37a45 100644 --- a/wgengine/router/ifconfig_windows.go +++ b/wgengine/router/ifconfig_windows.go @@ -235,7 +235,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I return nil, fmt.Errorf("interfaceFromLUID: interface with LUID %v not found", luid) } -var networkCategoryWarning = health.Global.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy")) +var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy")) func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) { var mtu = tstun.DefaultTUNMTU() @@ -268,10 +268,10 @@ func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) { for i := range tries { found, err := setPrivateNetwork(luid) if err != nil { - networkCategoryWarning.Set(fmt.Errorf("set-network-category: %w", err)) + health.Global.SetWarnable(networkCategoryWarning, fmt.Errorf("set-network-category: %w", err)) log.Printf("setPrivateNetwork(try=%d): %v", i, err) } else { - networkCategoryWarning.Set(nil) + health.Global.SetWarnable(networkCategoryWarning, nil) if found { if i > 0 { log.Printf("setPrivateNetwork(try=%d): success", i)