ipn/ipnlocal: add advertised and primary route metrics
Updates tailscale/corp#22075 Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
parent
cab2e6ea67
commit
77832553e5
|
@ -390,9 +390,18 @@ type updateStatus struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type metrics struct {
|
type metrics struct {
|
||||||
// advertisedRoutes is a metric that counts the number of network routes that are advertised by the local node.
|
// advertisedRoutes is a metric that reports the number of network routes that are advertised by the local node.
|
||||||
// This informs the user of how many routes are being advertised by the local node, excluding exit routes.
|
// This informs the user of how many routes are being advertised by the local node, excluding exit routes.
|
||||||
advertisedRoutes *usermetric.Gauge
|
advertisedRoutes *usermetric.Gauge
|
||||||
|
|
||||||
|
// approvedRoutes is a metric that reports the number of network routes served by the local node and approved
|
||||||
|
// by the control server.
|
||||||
|
approvedRoutes *usermetric.Gauge
|
||||||
|
|
||||||
|
// primaryRoutes is a metric that reports the number of primary network routes served by the local node.
|
||||||
|
// A route being a primary route implies that the route is currently served by this node, and not by another
|
||||||
|
// subnet router in a high availability configuration.
|
||||||
|
primaryRoutes *usermetric.Gauge
|
||||||
}
|
}
|
||||||
|
|
||||||
// clientGen is a func that creates a control plane client.
|
// clientGen is a func that creates a control plane client.
|
||||||
|
@ -441,6 +450,10 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
|
||||||
m := metrics{
|
m := metrics{
|
||||||
advertisedRoutes: sys.UserMetricsRegistry().NewGauge(
|
advertisedRoutes: sys.UserMetricsRegistry().NewGauge(
|
||||||
"tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)"),
|
"tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)"),
|
||||||
|
approvedRoutes: sys.UserMetricsRegistry().NewGauge(
|
||||||
|
"tailscaled_approved_routes", "Number of approved network routes (e.g. by a subnet router)"),
|
||||||
|
primaryRoutes: sys.UserMetricsRegistry().NewGauge(
|
||||||
|
"tailscaled_primary_routes", "Number of network routes for which this node is a primary router (in high availability configuration)"),
|
||||||
}
|
}
|
||||||
|
|
||||||
b := &LocalBackend{
|
b := &LocalBackend{
|
||||||
|
@ -5388,6 +5401,11 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
|
||||||
b.setTCPPortsInterceptedFromNetmapAndPrefsLocked(b.pm.CurrentPrefs())
|
b.setTCPPortsInterceptedFromNetmapAndPrefsLocked(b.pm.CurrentPrefs())
|
||||||
if nm == nil {
|
if nm == nil {
|
||||||
b.nodeByAddr = nil
|
b.nodeByAddr = nil
|
||||||
|
|
||||||
|
// If there is no netmap, the client is going into a "turned off"
|
||||||
|
// state so reset the metrics.
|
||||||
|
b.metrics.approvedRoutes.Set(0)
|
||||||
|
b.metrics.primaryRoutes.Set(0)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5408,6 +5426,15 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
|
||||||
}
|
}
|
||||||
if nm.SelfNode.Valid() {
|
if nm.SelfNode.Valid() {
|
||||||
addNode(nm.SelfNode)
|
addNode(nm.SelfNode)
|
||||||
|
|
||||||
|
var approved float64
|
||||||
|
for _, route := range nm.SelfNode.AllowedIPs().All() {
|
||||||
|
if !views.SliceContains(nm.SelfNode.Addresses(), route) && !tsaddr.IsExitRoute(route) {
|
||||||
|
approved++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.metrics.approvedRoutes.Set(approved)
|
||||||
|
b.metrics.primaryRoutes.Set(float64(tsaddr.WithoutExitRoute(nm.SelfNode.PrimaryRoutes()).Len()))
|
||||||
}
|
}
|
||||||
for _, p := range nm.Peers {
|
for _, p := range nm.Peers {
|
||||||
addNode(p)
|
addNode(p)
|
||||||
|
|
|
@ -26,6 +26,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
@ -924,6 +925,32 @@ func TestUserMetrics(t *testing.T) {
|
||||||
s1.lb.DebugForceNetmapUpdate()
|
s1.lb.DebugForceNetmapUpdate()
|
||||||
s2.lb.DebugForceNetmapUpdate()
|
s2.lb.DebugForceNetmapUpdate()
|
||||||
|
|
||||||
|
wantRoutes := float64(2)
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
wantRoutes = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the routes to be propagated to node 1 to ensure
|
||||||
|
// that the metrics are up-to-date.
|
||||||
|
waitForCondition(t, "primary routes available for node1", 90*time.Second, func() bool {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
status1, err := lc1.Status(ctx)
|
||||||
|
if err != nil {
|
||||||
|
t.Logf("getting status: %s", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
// Windows does not seem to support or report back routes when running in
|
||||||
|
// userspace via tsnet. So, we skip this check on Windows.
|
||||||
|
// TODO(kradalby): Figure out if this is correct.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Wait for the primary routes to reach our desired routes, which is wantRoutes + 1, because
|
||||||
|
// the PrimaryRoutes list will contain a exit node route, which the metric does not count.
|
||||||
|
return status1.Self.PrimaryRoutes != nil && status1.Self.PrimaryRoutes.Len() == int(wantRoutes)+1
|
||||||
|
})
|
||||||
|
|
||||||
ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second)
|
ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
defer cancelLc()
|
defer cancelLc()
|
||||||
metrics1, err := lc1.UserMetrics(ctxLc)
|
metrics1, err := lc1.UserMetrics(ctxLc)
|
||||||
|
@ -951,11 +978,25 @@ func TestUserMetrics(t *testing.T) {
|
||||||
t.Errorf("metrics1, tailscaled_advertised_routes: got %v, want %v", got, want)
|
t.Errorf("metrics1, tailscaled_advertised_routes: got %v, want %v", got, want)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The control has approved 2 routes:
|
||||||
|
// - 192.0.2.0/24
|
||||||
|
// - 192.0.5.1/32
|
||||||
|
if got, want := parsedMetrics1["tailscaled_approved_routes"], wantRoutes; got != want {
|
||||||
|
t.Errorf("metrics1, tailscaled_approved_routes: got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
// Validate the health counter metric against the status of the node
|
// Validate the health counter metric against the status of the node
|
||||||
if got, want := parsedMetrics1[`tailscaled_health_messages{type="warning"}`], float64(len(status1.Health)); got != want {
|
if got, want := parsedMetrics1[`tailscaled_health_messages{type="warning"}`], float64(len(status1.Health)); got != want {
|
||||||
t.Errorf("metrics1, tailscaled_health_messages: got %v, want %v", got, want)
|
t.Errorf("metrics1, tailscaled_health_messages: got %v, want %v", got, want)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The node is the primary subnet router for 2 routes:
|
||||||
|
// - 192.0.2.0/24
|
||||||
|
// - 192.0.5.1/32
|
||||||
|
if got, want := parsedMetrics1["tailscaled_primary_routes"], wantRoutes; got != want {
|
||||||
|
t.Errorf("metrics1, tailscaled_primary_routes: got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
metrics2, err := lc2.UserMetrics(ctx)
|
metrics2, err := lc2.UserMetrics(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
@ -978,8 +1019,28 @@ func TestUserMetrics(t *testing.T) {
|
||||||
t.Errorf("metrics2, tailscaled_advertised_routes: got %v, want %v", got, want)
|
t.Errorf("metrics2, tailscaled_advertised_routes: got %v, want %v", got, want)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The control has approved 0 routes
|
||||||
|
if got, want := parsedMetrics2["tailscaled_approved_routes"], 0.0; got != want {
|
||||||
|
t.Errorf("metrics2, tailscaled_approved_routes: got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
// Validate the health counter metric against the status of the node
|
// Validate the health counter metric against the status of the node
|
||||||
if got, want := parsedMetrics2[`tailscaled_health_messages{type="warning"}`], float64(len(status2.Health)); got != want {
|
if got, want := parsedMetrics2[`tailscaled_health_messages{type="warning"}`], float64(len(status2.Health)); got != want {
|
||||||
t.Errorf("metrics2, tailscaled_health_messages: got %v, want %v", got, want)
|
t.Errorf("metrics2, tailscaled_health_messages: got %v, want %v", got, want)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The node is the primary subnet router for 0 routes
|
||||||
|
if got, want := parsedMetrics2["tailscaled_primary_routes"], 0.0; got != want {
|
||||||
|
t.Errorf("metrics2, tailscaled_primary_routes: got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func waitForCondition(t *testing.T, msg string, waitTime time.Duration, f func() bool) {
|
||||||
|
t.Helper()
|
||||||
|
for deadline := time.Now().Add(waitTime); time.Now().Before(deadline); time.Sleep(1 * time.Second) {
|
||||||
|
if f() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Fatalf("waiting for condition: %s", msg)
|
||||||
}
|
}
|
||||||
|
|
|
@ -366,6 +366,7 @@ func (s *Server) serveMachine(w http.ResponseWriter, r *http.Request) {
|
||||||
func (s *Server) SetSubnetRoutes(nodeKey key.NodePublic, routes []netip.Prefix) {
|
func (s *Server) SetSubnetRoutes(nodeKey key.NodePublic, routes []netip.Prefix) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
s.logf("Setting subnet routes for %s: %v", nodeKey.ShortString(), routes)
|
||||||
mak.Set(&s.nodeSubnetRoutes, nodeKey, routes)
|
mak.Set(&s.nodeSubnetRoutes, nodeKey, routes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1018,6 +1019,7 @@ func (s *Server) MapResponse(req *tailcfg.MapRequest) (res *tailcfg.MapResponse,
|
||||||
|
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
res.Node.PrimaryRoutes = s.nodeSubnetRoutes[nk]
|
||||||
res.Node.AllowedIPs = append(res.Node.Addresses, s.nodeSubnetRoutes[nk]...)
|
res.Node.AllowedIPs = append(res.Node.Addresses, s.nodeSubnetRoutes[nk]...)
|
||||||
|
|
||||||
// Consume a PingRequest while protected by mutex if it exists
|
// Consume a PingRequest while protected by mutex if it exists
|
||||||
|
|
Loading…
Reference in New Issue