control/controlclient: stop restarting map polls on health change
At some point we started restarting map polls on health change, but we don't remember why. Maybe it was a desperate workaround for something. I'm not sure it ever worked. Rather than have a haunted graveyard, remove it. In its place, though, and somewhat as a safety backup, send those updates over the HTTP/2 noise channel if we have one open. Then if there was a reason that a map poll restart would help we could do it server-side. But mostly we can gather error stats and show machine-level health info for debugging. Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
6d04184325
commit
fb4e23506f
|
@ -114,19 +114,11 @@ func NewNoStart(opts Options) (*Auto, error) {
|
|||
}
|
||||
c.authCtx, c.authCancel = context.WithCancel(context.Background())
|
||||
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
|
||||
c.unregisterHealthWatch = health.RegisterWatcher(c.onHealthChange)
|
||||
c.unregisterHealthWatch = health.RegisterWatcher(direct.ReportHealthChange)
|
||||
return c, nil
|
||||
|
||||
}
|
||||
|
||||
func (c *Auto) onHealthChange(sys health.Subsystem, err error) {
|
||||
if sys == health.SysOverall {
|
||||
return
|
||||
}
|
||||
c.logf("controlclient: restarting map request for %q health change to new state: %v", sys, err)
|
||||
c.cancelMapSafely()
|
||||
}
|
||||
|
||||
// SetPaused controls whether HTTP activity should be paused.
|
||||
//
|
||||
// The client can be paused and unpaused repeatedly, unlike Start and Shutdown, which can only be used once.
|
||||
|
|
|
@ -1586,6 +1586,38 @@ func postPingResult(start time.Time, logf logger.Logf, c *http.Client, pr *tailc
|
|||
return nil
|
||||
}
|
||||
|
||||
// ReportHealthChange reports to the control plane a change to this node's
|
||||
// health.
|
||||
func (c *Direct) ReportHealthChange(sys health.Subsystem, sysErr error) {
|
||||
if sys == health.SysOverall {
|
||||
// We don't report these. These include things like the network is down
|
||||
// (in which case we can't report anyway) or the user wanted things
|
||||
// stopped, as opposed to the more unexpected failure types in the other
|
||||
// subsystems.
|
||||
return
|
||||
}
|
||||
np, err := c.getNoiseClient()
|
||||
if err != nil {
|
||||
// Don't report errors to control if the server doesn't support noise.
|
||||
return
|
||||
}
|
||||
req := &tailcfg.HealthChangeRequest{
|
||||
Subsys: string(sys),
|
||||
}
|
||||
if sysErr != nil {
|
||||
req.Error = sysErr.Error()
|
||||
}
|
||||
|
||||
// Best effort, no logging:
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
res, err := np.post(ctx, "/machine/update-health", req)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
res.Body.Close()
|
||||
}
|
||||
|
||||
var (
|
||||
metricMapRequestsActive = clientmetric.NewGauge("controlclient_map_requests_active")
|
||||
|
||||
|
|
|
@ -1684,6 +1684,13 @@ type SetDNSRequest struct {
|
|||
// SetDNSResponse is the response to a SetDNSRequest.
|
||||
type SetDNSResponse struct{}
|
||||
|
||||
// HealthChangeRequest is the JSON request body type used to report
|
||||
// node health changes to https://<control>/machine/<mkey hex>/update-health.
|
||||
type HealthChangeRequest struct {
|
||||
Subsys string // a health.Subsystem value in string form
|
||||
Error string // or empty if cleared
|
||||
}
|
||||
|
||||
// SSHPolicy is the policy for how to handle incoming SSH connections
|
||||
// over Tailscale.
|
||||
type SSHPolicy struct {
|
||||
|
|
Loading…
Reference in New Issue