netcheck, controlclient, magicsock: add more metrics
Updates #3307 Change-Id: Ibb33425764a75bde49230632f1b472f923551126 Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
3b541c833e
commit
24ea365d48
|
@ -72,6 +72,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
|
|||
tailscale.com/types/persist from tailscale.com/ipn
|
||||
tailscale.com/types/preftype from tailscale.com/cmd/tailscale/cli+
|
||||
tailscale.com/types/structs from tailscale.com/ipn+
|
||||
tailscale.com/util/clientmetric from tailscale.com/net/netcheck
|
||||
tailscale.com/util/dnsname from tailscale.com/cmd/tailscale/cli+
|
||||
W tailscale.com/util/endian from tailscale.com/net/netns
|
||||
tailscale.com/util/groupmember from tailscale.com/cmd/tailscale/cli
|
||||
|
|
|
@ -46,6 +46,7 @@ import (
|
|||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/types/opt"
|
||||
"tailscale.com/types/persist"
|
||||
"tailscale.com/util/clientmetric"
|
||||
"tailscale.com/util/systemd"
|
||||
"tailscale.com/wgengine/monitor"
|
||||
)
|
||||
|
@ -558,6 +559,15 @@ const pollTimeout = 120 * time.Second
|
|||
|
||||
// cb nil means to omit peers.
|
||||
func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netmap.NetworkMap)) error {
|
||||
metricMapRequests.Add(1)
|
||||
metricMapRequestsActive.Add(1)
|
||||
defer metricMapRequestsActive.Add(-1)
|
||||
if maxPolls == -1 {
|
||||
metricMapRequestsPoll.Add(1)
|
||||
} else {
|
||||
metricMapRequestsLite.Add(1)
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
persist := c.persist
|
||||
serverURL := c.serverURL
|
||||
|
@ -747,11 +757,14 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
|
|||
return err
|
||||
}
|
||||
|
||||
metricMapResponseMessages.Add(1)
|
||||
|
||||
if allowStream {
|
||||
health.GotStreamedMapResponse()
|
||||
}
|
||||
|
||||
if pr := resp.PingRequest; pr != nil && c.isUniquePingRequest(pr) {
|
||||
metricMapResponsePings.Add(1)
|
||||
go answerPing(c.logf, c.httpc, pr)
|
||||
}
|
||||
|
||||
|
@ -768,9 +781,15 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
|
|||
return ctx.Err()
|
||||
}
|
||||
if resp.KeepAlive {
|
||||
metricMapResponseKeepAlives.Add(1)
|
||||
continue
|
||||
}
|
||||
|
||||
metricMapResponseMap.Add(1)
|
||||
if i > 0 {
|
||||
metricMapResponseMapDelta.Add(1)
|
||||
}
|
||||
|
||||
hasDebug := resp.Debug != nil
|
||||
// being conservative here, if Debug not present set to False
|
||||
controlknobs.SetDisableUPnP(hasDebug && resp.Debug.DisableUPnP.EqualBool(true))
|
||||
|
@ -1181,7 +1200,13 @@ func sleepAsRequested(ctx context.Context, logf logger.Logf, timeoutReset chan<-
|
|||
|
||||
// SetDNS sends the SetDNSRequest request to the control plane server,
|
||||
// requesting a DNS record be created or updated.
|
||||
func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) error {
|
||||
func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err error) {
|
||||
metricSetDNS.Add(1)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
metricSetDNSError.Add(1)
|
||||
}
|
||||
}()
|
||||
c.mu.Lock()
|
||||
serverKey := c.serverKey
|
||||
c.mu.Unlock()
|
||||
|
@ -1281,3 +1306,20 @@ func postPingResult(now time.Time, logf logger.Logf, c *http.Client, pr *tailcfg
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
metricMapRequestsActive = clientmetric.NewGauge("controlclient_map_requests_active")
|
||||
|
||||
metricMapRequests = clientmetric.NewCounter("controlclient_map_requests")
|
||||
metricMapRequestsLite = clientmetric.NewCounter("controlclient_map_requests_lite")
|
||||
metricMapRequestsPoll = clientmetric.NewCounter("controlclient_map_requests_poll")
|
||||
|
||||
metricMapResponseMessages = clientmetric.NewCounter("controlclient_map_response_message") // any message type
|
||||
metricMapResponsePings = clientmetric.NewCounter("controlclient_map_response_ping")
|
||||
metricMapResponseKeepAlives = clientmetric.NewCounter("controlclient_map_response_keepalive")
|
||||
metricMapResponseMap = clientmetric.NewCounter("controlclient_map_response_map") // any non-keepalive map response
|
||||
metricMapResponseMapDelta = clientmetric.NewCounter("controlclient_map_response_map_delta") // 2nd+ non-keepalive map response
|
||||
|
||||
metricSetDNS = clientmetric.NewCounter("controlclient_setdns")
|
||||
metricSetDNSError = clientmetric.NewCounter("controlclient_setdns_error")
|
||||
)
|
||||
|
|
|
@ -34,6 +34,7 @@ import (
|
|||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/types/opt"
|
||||
"tailscale.com/util/clientmetric"
|
||||
)
|
||||
|
||||
// Debugging and experimentation tweakables.
|
||||
|
@ -232,6 +233,12 @@ func (c *Client) MakeNextReportFull() {
|
|||
func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) {
|
||||
c.vlogf("received STUN packet from %s", src)
|
||||
|
||||
if src.IP().Is4() {
|
||||
metricSTUNRecv4.Add(1)
|
||||
} else if src.IP().Is6() {
|
||||
metricSTUNRecv6.Add(1)
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
if c.handleHairSTUNLocked(pkt, src) {
|
||||
c.mu.Unlock()
|
||||
|
@ -737,7 +744,13 @@ func (c *Client) udpBindAddr() string {
|
|||
// GetReport gets a report.
|
||||
//
|
||||
// It may not be called concurrently with itself.
|
||||
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, error) {
|
||||
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, reterr error) {
|
||||
defer func() {
|
||||
if reterr != nil {
|
||||
metricNumGetReportError.Add(1)
|
||||
}
|
||||
}()
|
||||
metricNumGetReport.Add(1)
|
||||
// Mask user context with ours that we guarantee to cancel so
|
||||
// we can depend on it being closed in goroutines later.
|
||||
// (User ctx might be context.Background, etc)
|
||||
|
@ -769,6 +782,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e
|
|||
last = nil // causes makeProbePlan below to do a full (initial) plan
|
||||
c.nextFull = false
|
||||
c.lastFull = now
|
||||
metricNumGetReportFull.Add(1)
|
||||
}
|
||||
rs.incremental = last != nil
|
||||
c.mu.Unlock()
|
||||
|
@ -983,6 +997,7 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report
|
|||
}
|
||||
|
||||
func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) {
|
||||
metricHTTPSend.Add(1)
|
||||
var result httpstat.Result
|
||||
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout)
|
||||
defer cancel()
|
||||
|
@ -1217,6 +1232,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
|
|||
|
||||
switch probe.proto {
|
||||
case probeIPv4:
|
||||
metricSTUNSend4.Add(1)
|
||||
n, err := rs.pc4.WriteTo(req, addr)
|
||||
if n == len(req) && err == nil {
|
||||
rs.mu.Lock()
|
||||
|
@ -1224,6 +1240,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
|
|||
rs.mu.Unlock()
|
||||
}
|
||||
case probeIPv6:
|
||||
metricSTUNSend6.Add(1)
|
||||
n, err := rs.pc6.WriteTo(req, addr)
|
||||
if n == len(req) && err == nil {
|
||||
rs.mu.Lock()
|
||||
|
@ -1322,3 +1339,15 @@ func conciseOptBool(b opt.Bool, trueVal string) string {
|
|||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var (
|
||||
metricNumGetReport = clientmetric.NewCounter("netcheck_report")
|
||||
metricNumGetReportFull = clientmetric.NewCounter("netcheck_report_full")
|
||||
metricNumGetReportError = clientmetric.NewCounter("netcheck_report_error")
|
||||
|
||||
metricSTUNSend4 = clientmetric.NewCounter("netcheck_stun_send_ipv4")
|
||||
metricSTUNSend6 = clientmetric.NewCounter("netcheck_stun_send_ipv6")
|
||||
metricSTUNRecv4 = clientmetric.NewCounter("netcheck_stun_recv_ipv4")
|
||||
metricSTUNRecv6 = clientmetric.NewCounter("netcheck_stun_recv_ipv6")
|
||||
metricHTTPSend = clientmetric.NewCounter("netcheck_https_measure")
|
||||
)
|
||||
|
|
|
@ -967,6 +967,9 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
|
|||
// No change.
|
||||
return true
|
||||
}
|
||||
if c.myDerp != 0 && derpNum != 0 {
|
||||
metricDERPHomeChange.Add(1)
|
||||
}
|
||||
c.myDerp = derpNum
|
||||
health.SetMagicSockDERPHome(derpNum)
|
||||
|
||||
|
@ -1616,6 +1619,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
|
|||
err := dc.Send(wr.pubKey, wr.b)
|
||||
if err != nil {
|
||||
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
|
||||
metricSendDERPError.Add(1)
|
||||
} else {
|
||||
metricSendDERP.Add(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4054,6 +4060,8 @@ var (
|
|||
metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue")
|
||||
metricSendUDP = clientmetric.NewCounter("magicsock_send_udp")
|
||||
metricSendUDPError = clientmetric.NewCounter("magicsock_send_udp_error")
|
||||
metricSendDERP = clientmetric.NewCounter("magicsock_send_derp")
|
||||
metricSendDERPError = clientmetric.NewCounter("magicsock_send_derp_error")
|
||||
|
||||
// Data packets (non-disco)
|
||||
metricSendData = clientmetric.NewCounter("magicsock_send_data")
|
||||
|
@ -4079,4 +4087,8 @@ var (
|
|||
metricRecvDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe")
|
||||
metricRecvDiscoCallMeMaybeBadNode = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_node")
|
||||
metricRecvDiscoCallMeMaybeBadDisco = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_disco")
|
||||
|
||||
// metricDERPHomeChange is how many times our DERP home region DI has
|
||||
// changed from non-zero to a different non-zero.
|
||||
metricDERPHomeChange = clientmetric.NewCounter("derp_home_change")
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue