netcheck, controlclient, magicsock: add more metrics

Updates #3307

Change-Id: Ibb33425764a75bde49230632f1b472f923551126
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2021-11-16 08:34:25 -08:00 committed by Brad Fitzpatrick
parent 3b541c833e
commit 24ea365d48
4 changed files with 86 additions and 2 deletions

View File

@ -72,6 +72,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/types/persist from tailscale.com/ipn
tailscale.com/types/preftype from tailscale.com/cmd/tailscale/cli+
tailscale.com/types/structs from tailscale.com/ipn+
tailscale.com/util/clientmetric from tailscale.com/net/netcheck
tailscale.com/util/dnsname from tailscale.com/cmd/tailscale/cli+
W tailscale.com/util/endian from tailscale.com/net/netns
tailscale.com/util/groupmember from tailscale.com/cmd/tailscale/cli

View File

@ -46,6 +46,7 @@ import (
"tailscale.com/types/netmap"
"tailscale.com/types/opt"
"tailscale.com/types/persist"
"tailscale.com/util/clientmetric"
"tailscale.com/util/systemd"
"tailscale.com/wgengine/monitor"
)
@ -558,6 +559,15 @@ const pollTimeout = 120 * time.Second
// cb nil means to omit peers.
func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netmap.NetworkMap)) error {
metricMapRequests.Add(1)
metricMapRequestsActive.Add(1)
defer metricMapRequestsActive.Add(-1)
if maxPolls == -1 {
metricMapRequestsPoll.Add(1)
} else {
metricMapRequestsLite.Add(1)
}
c.mu.Lock()
persist := c.persist
serverURL := c.serverURL
@ -747,11 +757,14 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
return err
}
metricMapResponseMessages.Add(1)
if allowStream {
health.GotStreamedMapResponse()
}
if pr := resp.PingRequest; pr != nil && c.isUniquePingRequest(pr) {
metricMapResponsePings.Add(1)
go answerPing(c.logf, c.httpc, pr)
}
@ -768,9 +781,15 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
return ctx.Err()
}
if resp.KeepAlive {
metricMapResponseKeepAlives.Add(1)
continue
}
metricMapResponseMap.Add(1)
if i > 0 {
metricMapResponseMapDelta.Add(1)
}
hasDebug := resp.Debug != nil
// being conservative here, if Debug not present set to False
controlknobs.SetDisableUPnP(hasDebug && resp.Debug.DisableUPnP.EqualBool(true))
@ -1181,7 +1200,13 @@ func sleepAsRequested(ctx context.Context, logf logger.Logf, timeoutReset chan<-
// SetDNS sends the SetDNSRequest request to the control plane server,
// requesting a DNS record be created or updated.
func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) error {
func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err error) {
metricSetDNS.Add(1)
defer func() {
if err != nil {
metricSetDNSError.Add(1)
}
}()
c.mu.Lock()
serverKey := c.serverKey
c.mu.Unlock()
@ -1281,3 +1306,20 @@ func postPingResult(now time.Time, logf logger.Logf, c *http.Client, pr *tailcfg
}
return nil
}
var (
metricMapRequestsActive = clientmetric.NewGauge("controlclient_map_requests_active")
metricMapRequests = clientmetric.NewCounter("controlclient_map_requests")
metricMapRequestsLite = clientmetric.NewCounter("controlclient_map_requests_lite")
metricMapRequestsPoll = clientmetric.NewCounter("controlclient_map_requests_poll")
metricMapResponseMessages = clientmetric.NewCounter("controlclient_map_response_message") // any message type
metricMapResponsePings = clientmetric.NewCounter("controlclient_map_response_ping")
metricMapResponseKeepAlives = clientmetric.NewCounter("controlclient_map_response_keepalive")
metricMapResponseMap = clientmetric.NewCounter("controlclient_map_response_map") // any non-keepalive map response
metricMapResponseMapDelta = clientmetric.NewCounter("controlclient_map_response_map_delta") // 2nd+ non-keepalive map response
metricSetDNS = clientmetric.NewCounter("controlclient_setdns")
metricSetDNSError = clientmetric.NewCounter("controlclient_setdns_error")
)

View File

@ -34,6 +34,7 @@ import (
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
"tailscale.com/types/opt"
"tailscale.com/util/clientmetric"
)
// Debugging and experimentation tweakables.
@ -232,6 +233,12 @@ func (c *Client) MakeNextReportFull() {
func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) {
c.vlogf("received STUN packet from %s", src)
if src.IP().Is4() {
metricSTUNRecv4.Add(1)
} else if src.IP().Is6() {
metricSTUNRecv6.Add(1)
}
c.mu.Lock()
if c.handleHairSTUNLocked(pkt, src) {
c.mu.Unlock()
@ -737,7 +744,13 @@ func (c *Client) udpBindAddr() string {
// GetReport gets a report.
//
// It may not be called concurrently with itself.
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, error) {
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, reterr error) {
defer func() {
if reterr != nil {
metricNumGetReportError.Add(1)
}
}()
metricNumGetReport.Add(1)
// Mask user context with ours that we guarantee to cancel so
// we can depend on it being closed in goroutines later.
// (User ctx might be context.Background, etc)
@ -769,6 +782,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e
last = nil // causes makeProbePlan below to do a full (initial) plan
c.nextFull = false
c.lastFull = now
metricNumGetReportFull.Add(1)
}
rs.incremental = last != nil
c.mu.Unlock()
@ -983,6 +997,7 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report
}
func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) {
metricHTTPSend.Add(1)
var result httpstat.Result
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout)
defer cancel()
@ -1217,6 +1232,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
switch probe.proto {
case probeIPv4:
metricSTUNSend4.Add(1)
n, err := rs.pc4.WriteTo(req, addr)
if n == len(req) && err == nil {
rs.mu.Lock()
@ -1224,6 +1240,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
rs.mu.Unlock()
}
case probeIPv6:
metricSTUNSend6.Add(1)
n, err := rs.pc6.WriteTo(req, addr)
if n == len(req) && err == nil {
rs.mu.Lock()
@ -1322,3 +1339,15 @@ func conciseOptBool(b opt.Bool, trueVal string) string {
}
return ""
}
var (
metricNumGetReport = clientmetric.NewCounter("netcheck_report")
metricNumGetReportFull = clientmetric.NewCounter("netcheck_report_full")
metricNumGetReportError = clientmetric.NewCounter("netcheck_report_error")
metricSTUNSend4 = clientmetric.NewCounter("netcheck_stun_send_ipv4")
metricSTUNSend6 = clientmetric.NewCounter("netcheck_stun_send_ipv6")
metricSTUNRecv4 = clientmetric.NewCounter("netcheck_stun_recv_ipv4")
metricSTUNRecv6 = clientmetric.NewCounter("netcheck_stun_recv_ipv6")
metricHTTPSend = clientmetric.NewCounter("netcheck_https_measure")
)

View File

@ -967,6 +967,9 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
// No change.
return true
}
if c.myDerp != 0 && derpNum != 0 {
metricDERPHomeChange.Add(1)
}
c.myDerp = derpNum
health.SetMagicSockDERPHome(derpNum)
@ -1616,6 +1619,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
err := dc.Send(wr.pubKey, wr.b)
if err != nil {
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
metricSendDERPError.Add(1)
} else {
metricSendDERP.Add(1)
}
}
}
@ -4054,6 +4060,8 @@ var (
metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue")
metricSendUDP = clientmetric.NewCounter("magicsock_send_udp")
metricSendUDPError = clientmetric.NewCounter("magicsock_send_udp_error")
metricSendDERP = clientmetric.NewCounter("magicsock_send_derp")
metricSendDERPError = clientmetric.NewCounter("magicsock_send_derp_error")
// Data packets (non-disco)
metricSendData = clientmetric.NewCounter("magicsock_send_data")
@ -4079,4 +4087,8 @@ var (
metricRecvDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe")
metricRecvDiscoCallMeMaybeBadNode = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_node")
metricRecvDiscoCallMeMaybeBadDisco = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_disco")
// metricDERPHomeChange is how many times our DERP home region DI has
// changed from non-zero to a different non-zero.
metricDERPHomeChange = clientmetric.NewCounter("derp_home_change")
)