health, control/controlclient, wgengine: report when router unhealthy

Updates tailscale/corp#1338

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
(cherry picked from commit fd8e070d01)
This commit is contained in:
Brad Fitzpatrick 2021-02-18 08:58:13 -08:00
parent 1aeeeb7e45
commit a660748272
7 changed files with 108 additions and 10 deletions

View File

@ -43,6 +43,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/derp/derphttp from tailscale.com/cmd/tailscale/cli+
tailscale.com/derp/derpmap from tailscale.com/cmd/tailscale/cli
tailscale.com/disco from tailscale.com/derp+
tailscale.com/health from tailscale.com/control/controlclient+
tailscale.com/internal/deepprint from tailscale.com/ipn+
tailscale.com/ipn from tailscale.com/cmd/tailscale/cli
tailscale.com/ipn/ipnstate from tailscale.com/cmd/tailscale/cli+
@ -63,7 +64,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/paths from tailscale.com/cmd/tailscale/cli
tailscale.com/portlist from tailscale.com/ipn
tailscale.com/safesocket from tailscale.com/cmd/tailscale/cli
💣 tailscale.com/syncs from tailscale.com/net/interfaces+
tailscale.com/syncs from tailscale.com/net/interfaces+
tailscale.com/tailcfg from tailscale.com/cmd/tailscale/cli+
W tailscale.com/tsconst from tailscale.com/net/interfaces
tailscale.com/tstime from tailscale.com/wgengine/magicsock
@ -176,7 +177,8 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
hash/maphash from go4.org/mem
html from tailscale.com/ipn/ipnstate
io from bufio+
io/ioutil from crypto/tls+
io/fs from crypto/rand+
io/ioutil from github.com/godbus/dbus/v5+
log from expvar+
math from compress/flate+
math/big from crypto/dsa+

View File

@ -73,6 +73,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/derp from tailscale.com/derp/derphttp+
tailscale.com/derp/derphttp from tailscale.com/net/netcheck+
tailscale.com/disco from tailscale.com/derp+
tailscale.com/health from tailscale.com/control/controlclient+
tailscale.com/internal/deepprint from tailscale.com/ipn+
tailscale.com/ipn from tailscale.com/ipn/ipnserver
tailscale.com/ipn/ipnserver from tailscale.com/cmd/tailscaled
@ -100,7 +101,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/portlist from tailscale.com/ipn
tailscale.com/safesocket from tailscale.com/ipn/ipnserver
tailscale.com/smallzstd from tailscale.com/ipn/ipnserver+
💣 tailscale.com/syncs from tailscale.com/net/interfaces+
tailscale.com/syncs from tailscale.com/net/interfaces+
tailscale.com/tailcfg from tailscale.com/control/controlclient+
W tailscale.com/tsconst from tailscale.com/net/interfaces
tailscale.com/tstime from tailscale.com/wgengine/magicsock
@ -217,10 +218,10 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
hash/crc32 from compress/gzip+
hash/fnv from tailscale.com/wgengine/magicsock
hash/maphash from go4.org/mem
html from html/template+
html/template from net/http/pprof
html from net/http/pprof+
io from bufio+
io/ioutil from crypto/tls+
io/fs from crypto/rand+
io/ioutil from github.com/godbus/dbus/v5+
log from expvar+
math from compress/flate+
math/big from crypto/dsa+
@ -255,8 +256,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
sync/atomic from context+
syscall from crypto/rand+
text/tabwriter from runtime/pprof
text/template from html/template
text/template/parse from html/template+
time from compress/gzip+
unicode from bytes+
unicode/utf16 from encoding/asn1+

View File

@ -18,6 +18,7 @@ import (
"time"
"golang.org/x/oauth2"
"tailscale.com/health"
"tailscale.com/logtail/backoff"
"tailscale.com/tailcfg"
"tailscale.com/types/empty"
@ -114,6 +115,8 @@ type Client struct {
closed bool
newMapCh chan struct{} // readable when we must restart a map request
unregisterHealthWatch func()
mu sync.Mutex // mutex guards the following fields
statusFunc func(Status) // called to update Client status
@ -169,7 +172,14 @@ func NewNoStart(opts Options) (*Client, error) {
}
c.authCtx, c.authCancel = context.WithCancel(context.Background())
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.unregisterHealthWatch = health.RegisterWatcher(c.onHealthChange)
return c, nil
}
func (c *Client) onHealthChange(key string, err error) {
c.logf("controlclient: restarting map request for %q health change to new state: %v", key, err)
c.cancelMapSafely()
}
// SetPaused controls whether HTTP activity should be paused.
@ -698,6 +708,7 @@ func (c *Client) Shutdown() {
c.logf("client.Shutdown: inSendStatus=%v", inSendStatus)
if !closed {
c.unregisterHealthWatch()
close(c.quit)
c.cancelAuth()
<-c.authDone

View File

@ -34,6 +34,7 @@ import (
"golang.org/x/crypto/nacl/box"
"golang.org/x/oauth2"
"inet.af/netaddr"
"tailscale.com/health"
"tailscale.com/log/logheap"
"tailscale.com/net/dnscache"
"tailscale.com/net/netns"
@ -578,9 +579,16 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*Netw
DebugFlags: c.debugFlags,
OmitPeers: cb == nil,
}
var extraDebugFlags []string
if hostinfo != nil && ipForwardingBroken(hostinfo.RoutableIPs) {
extraDebugFlags = append(extraDebugFlags, "warn-ip-forwarding-off")
}
if health.RouterHealth() != nil {
extraDebugFlags = append(extraDebugFlags, "warn-router-unhealthy")
}
if len(extraDebugFlags) > 0 {
old := request.DebugFlags
request.DebugFlags = append(old[:len(old):len(old)], "warn-ip-forwarding-off")
request.DebugFlags = append(old[:len(old):len(old)], extraDebugFlags...)
}
if c.newDecompressor != nil {
request.Compress = "zstd"

71
health/health.go Normal file
View File

@ -0,0 +1,71 @@
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package health is a registry for other packages to report & check
// overall health status of the node.
package health
import (
"sync"
)
var (
mu sync.Mutex
m = map[string]error{} // error key => err (or nil for no error)
watchers = map[*watchHandle]func(string, error){} // opt func to run if error state changes
)
type watchHandle byte
// RegisterWatcher adds a function that will be called if an
// error changes state either to unhealthy or from unhealthy. It is
// not called on transition from unknown to healthy. It must be non-nil
// and is run in its own goroutine. The returned func unregisters it.
func RegisterWatcher(cb func(errKey string, err error)) (unregister func()) {
mu.Lock()
defer mu.Unlock()
handle := new(watchHandle)
watchers[handle] = cb
return func() {
mu.Lock()
defer mu.Unlock()
delete(watchers, handle)
}
}
// SetRouter sets the state of the wgengine/router.Router.
func SetRouterHealth(err error) { set("router", err) }
// RouterHealth returns the wgengine/router.Router error state.
func RouterHealth() error { return get("router") }
func get(key string) error {
mu.Lock()
defer mu.Unlock()
return m[key]
}
func set(key string, err error) {
mu.Lock()
defer mu.Unlock()
old, ok := m[key]
if !ok && err == nil {
// Initial happy path.
m[key] = nil
return
}
if ok && (old == nil) == (err == nil) {
// No change in overall error status (nil-vs-not), so
// don't run callbacks, but exact error might've
// changed, so note it.
if err != nil {
m[key] = err
}
return
}
m[key] = err
for _, cb := range watchers {
go cb(key, err)
}
}

View File

@ -634,6 +634,10 @@ type MapRequest struct {
// Current DebugFlags values are:
// * "warn-ip-forwarding-off": client is trying to be a subnet
// router but their IP forwarding is broken.
// * "warn-router-unhealthy": client's Router implementation is
// having problems.
// * "v6-overlay": IPv6 development flag to have control send
// v6 node addrs
// * "minimize-netmap": have control minimize the netmap, removing
// peers that are unreachable per ACLS.
DebugFlags []string `json:",omitempty"`

View File

@ -27,6 +27,7 @@ import (
"go4.org/mem"
"inet.af/netaddr"
"tailscale.com/control/controlclient"
"tailscale.com/health"
"tailscale.com/internal/deepprint"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/flowtrack"
@ -998,7 +999,9 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config)
routerCfg.DNS.Nameservers = []netaddr.IP{tsaddr.TailscaleServiceIP()}
}
e.logf("wgengine: Reconfig: configuring router")
if err := e.router.Set(routerCfg); err != nil {
err := e.router.Set(routerCfg)
health.SetRouterHealth(err)
if err != nil {
return err
}
}