ipn/ipnlocal: add advertised and primary route metrics
Updates tailscale/corp#22075 Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
parent
cab2e6ea67
commit
77832553e5
|
@ -390,9 +390,18 @@ type updateStatus struct {
|
|||
}
|
||||
|
||||
type metrics struct {
|
||||
// advertisedRoutes is a metric that counts the number of network routes that are advertised by the local node.
|
||||
// advertisedRoutes is a metric that reports the number of network routes that are advertised by the local node.
|
||||
// This informs the user of how many routes are being advertised by the local node, excluding exit routes.
|
||||
advertisedRoutes *usermetric.Gauge
|
||||
|
||||
// approvedRoutes is a metric that reports the number of network routes served by the local node and approved
|
||||
// by the control server.
|
||||
approvedRoutes *usermetric.Gauge
|
||||
|
||||
// primaryRoutes is a metric that reports the number of primary network routes served by the local node.
|
||||
// A route being a primary route implies that the route is currently served by this node, and not by another
|
||||
// subnet router in a high availability configuration.
|
||||
primaryRoutes *usermetric.Gauge
|
||||
}
|
||||
|
||||
// clientGen is a func that creates a control plane client.
|
||||
|
@ -441,6 +450,10 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
|
|||
m := metrics{
|
||||
advertisedRoutes: sys.UserMetricsRegistry().NewGauge(
|
||||
"tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)"),
|
||||
approvedRoutes: sys.UserMetricsRegistry().NewGauge(
|
||||
"tailscaled_approved_routes", "Number of approved network routes (e.g. by a subnet router)"),
|
||||
primaryRoutes: sys.UserMetricsRegistry().NewGauge(
|
||||
"tailscaled_primary_routes", "Number of network routes for which this node is a primary router (in high availability configuration)"),
|
||||
}
|
||||
|
||||
b := &LocalBackend{
|
||||
|
@ -5388,6 +5401,11 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
|
|||
b.setTCPPortsInterceptedFromNetmapAndPrefsLocked(b.pm.CurrentPrefs())
|
||||
if nm == nil {
|
||||
b.nodeByAddr = nil
|
||||
|
||||
// If there is no netmap, the client is going into a "turned off"
|
||||
// state so reset the metrics.
|
||||
b.metrics.approvedRoutes.Set(0)
|
||||
b.metrics.primaryRoutes.Set(0)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -5408,6 +5426,15 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
|
|||
}
|
||||
if nm.SelfNode.Valid() {
|
||||
addNode(nm.SelfNode)
|
||||
|
||||
var approved float64
|
||||
for _, route := range nm.SelfNode.AllowedIPs().All() {
|
||||
if !views.SliceContains(nm.SelfNode.Addresses(), route) && !tsaddr.IsExitRoute(route) {
|
||||
approved++
|
||||
}
|
||||
}
|
||||
b.metrics.approvedRoutes.Set(approved)
|
||||
b.metrics.primaryRoutes.Set(float64(tsaddr.WithoutExitRoute(nm.SelfNode.PrimaryRoutes()).Len()))
|
||||
}
|
||||
for _, p := range nm.Peers {
|
||||
addNode(p)
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
@ -924,6 +925,32 @@ func TestUserMetrics(t *testing.T) {
|
|||
s1.lb.DebugForceNetmapUpdate()
|
||||
s2.lb.DebugForceNetmapUpdate()
|
||||
|
||||
wantRoutes := float64(2)
|
||||
if runtime.GOOS == "windows" {
|
||||
wantRoutes = 0
|
||||
}
|
||||
|
||||
// Wait for the routes to be propagated to node 1 to ensure
|
||||
// that the metrics are up-to-date.
|
||||
waitForCondition(t, "primary routes available for node1", 90*time.Second, func() bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
status1, err := lc1.Status(ctx)
|
||||
if err != nil {
|
||||
t.Logf("getting status: %s", err)
|
||||
return false
|
||||
}
|
||||
if runtime.GOOS == "windows" {
|
||||
// Windows does not seem to support or report back routes when running in
|
||||
// userspace via tsnet. So, we skip this check on Windows.
|
||||
// TODO(kradalby): Figure out if this is correct.
|
||||
return true
|
||||
}
|
||||
// Wait for the primary routes to reach our desired routes, which is wantRoutes + 1, because
|
||||
// the PrimaryRoutes list will contain a exit node route, which the metric does not count.
|
||||
return status1.Self.PrimaryRoutes != nil && status1.Self.PrimaryRoutes.Len() == int(wantRoutes)+1
|
||||
})
|
||||
|
||||
ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancelLc()
|
||||
metrics1, err := lc1.UserMetrics(ctxLc)
|
||||
|
@ -951,11 +978,25 @@ func TestUserMetrics(t *testing.T) {
|
|||
t.Errorf("metrics1, tailscaled_advertised_routes: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// The control has approved 2 routes:
|
||||
// - 192.0.2.0/24
|
||||
// - 192.0.5.1/32
|
||||
if got, want := parsedMetrics1["tailscaled_approved_routes"], wantRoutes; got != want {
|
||||
t.Errorf("metrics1, tailscaled_approved_routes: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// Validate the health counter metric against the status of the node
|
||||
if got, want := parsedMetrics1[`tailscaled_health_messages{type="warning"}`], float64(len(status1.Health)); got != want {
|
||||
t.Errorf("metrics1, tailscaled_health_messages: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// The node is the primary subnet router for 2 routes:
|
||||
// - 192.0.2.0/24
|
||||
// - 192.0.5.1/32
|
||||
if got, want := parsedMetrics1["tailscaled_primary_routes"], wantRoutes; got != want {
|
||||
t.Errorf("metrics1, tailscaled_primary_routes: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
metrics2, err := lc2.UserMetrics(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -978,8 +1019,28 @@ func TestUserMetrics(t *testing.T) {
|
|||
t.Errorf("metrics2, tailscaled_advertised_routes: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// The control has approved 0 routes
|
||||
if got, want := parsedMetrics2["tailscaled_approved_routes"], 0.0; got != want {
|
||||
t.Errorf("metrics2, tailscaled_approved_routes: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// Validate the health counter metric against the status of the node
|
||||
if got, want := parsedMetrics2[`tailscaled_health_messages{type="warning"}`], float64(len(status2.Health)); got != want {
|
||||
t.Errorf("metrics2, tailscaled_health_messages: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// The node is the primary subnet router for 0 routes
|
||||
if got, want := parsedMetrics2["tailscaled_primary_routes"], 0.0; got != want {
|
||||
t.Errorf("metrics2, tailscaled_primary_routes: got %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func waitForCondition(t *testing.T, msg string, waitTime time.Duration, f func() bool) {
|
||||
t.Helper()
|
||||
for deadline := time.Now().Add(waitTime); time.Now().Before(deadline); time.Sleep(1 * time.Second) {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Fatalf("waiting for condition: %s", msg)
|
||||
}
|
||||
|
|
|
@ -366,6 +366,7 @@ func (s *Server) serveMachine(w http.ResponseWriter, r *http.Request) {
|
|||
func (s *Server) SetSubnetRoutes(nodeKey key.NodePublic, routes []netip.Prefix) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.logf("Setting subnet routes for %s: %v", nodeKey.ShortString(), routes)
|
||||
mak.Set(&s.nodeSubnetRoutes, nodeKey, routes)
|
||||
}
|
||||
|
||||
|
@ -1018,6 +1019,7 @@ func (s *Server) MapResponse(req *tailcfg.MapRequest) (res *tailcfg.MapResponse,
|
|||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
res.Node.PrimaryRoutes = s.nodeSubnetRoutes[nk]
|
||||
res.Node.AllowedIPs = append(res.Node.Addresses, s.nodeSubnetRoutes[nk]...)
|
||||
|
||||
// Consume a PingRequest while protected by mutex if it exists
|
||||
|
|
Loading…
Reference in New Issue