cmd/k8s-operator: add metrics to track usage

Updates #502

Signed-off-by: Maisem Ali <maisem@tailscale.com>
This commit is contained in:
Maisem Ali 2023-08-30 09:49:11 -07:00 committed by Maisem Ali
parent 0a74d46568
commit 306b85b9a3
4 changed files with 75 additions and 0 deletions

View File

@ -9,6 +9,7 @@ import (
"context" "context"
"fmt" "fmt"
"strings" "strings"
"sync"
"go.uber.org/zap" "go.uber.org/zap"
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
@ -21,6 +22,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/ipn" "tailscale.com/ipn"
"tailscale.com/types/opt" "tailscale.com/types/opt"
"tailscale.com/util/clientmetric"
"tailscale.com/util/set"
) )
type IngressReconciler struct { type IngressReconciler struct {
@ -29,8 +32,20 @@ type IngressReconciler struct {
recorder record.EventRecorder recorder record.EventRecorder
ssr *tailscaleSTSReconciler ssr *tailscaleSTSReconciler
logger *zap.SugaredLogger logger *zap.SugaredLogger
mu sync.Mutex // protects following
// managedIngresses is a set of all ingress resources that we're currently
// managing. This is only used for metrics.
managedIngresses set.Slice[types.UID]
} }
var (
// gaugeIngressResources tracks the number of ingress resources that we're
// currently managing.
gaugeIngressResources = clientmetric.NewGauge("k8s_ingress_resources")
)
func (a *IngressReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) { func (a *IngressReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
logger := a.logger.With("ingress-ns", req.Namespace, "ingress-name", req.Name) logger := a.logger.With("ingress-ns", req.Namespace, "ingress-name", req.Name)
logger.Debugf("starting reconcile") logger.Debugf("starting reconcile")
@ -57,6 +72,10 @@ func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
ix := slices.Index(ing.Finalizers, FinalizerName) ix := slices.Index(ing.Finalizers, FinalizerName)
if ix < 0 { if ix < 0 {
logger.Debugf("no finalizer, nothing to do") logger.Debugf("no finalizer, nothing to do")
a.mu.Lock()
defer a.mu.Unlock()
a.managedIngresses.Remove(ing.UID)
gaugeIngressResources.Set(int64(a.managedIngresses.Len()))
return nil return nil
} }
@ -77,6 +96,10 @@ func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
// cleanup removes the tailscale finalizer, which will make all future // cleanup removes the tailscale finalizer, which will make all future
// reconciles exit early. // reconciles exit early.
logger.Infof("unexposed ingress from tailnet") logger.Infof("unexposed ingress from tailnet")
a.mu.Lock()
defer a.mu.Unlock()
a.managedIngresses.Remove(ing.UID)
gaugeIngressResources.Set(int64(a.managedIngresses.Len()))
return nil return nil
} }
@ -97,6 +120,10 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
return fmt.Errorf("failed to add finalizer: %w", err) return fmt.Errorf("failed to add finalizer: %w", err)
} }
} }
a.mu.Lock()
a.managedIngresses.Add(ing.UID)
gaugeIngressResources.Set(int64(a.managedIngresses.Len()))
a.mu.Unlock()
// magic443 is a fake hostname that we can use to tell containerboot to swap // magic443 is a fake hostname that we can use to tell containerboot to swap
// out with the real hostname once it's known. // out with the real hostname once it's known.

View File

@ -25,6 +25,7 @@ import (
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/tsnet" "tailscale.com/tsnet"
"tailscale.com/types/logger" "tailscale.com/types/logger"
"tailscale.com/util/clientmetric"
"tailscale.com/util/set" "tailscale.com/util/set"
) )
@ -42,6 +43,8 @@ func addWhoIsToRequest(r *http.Request, who *apitype.WhoIsResponse) *http.Reques
return r.WithContext(context.WithValue(r.Context(), whoIsKey{}, who)) return r.WithContext(context.WithValue(r.Context(), whoIsKey{}, who))
} }
var counterNumRequestsProxied = clientmetric.NewCounter("k8s_auth_proxy_requests_proxied")
// launchAuthProxy launches the auth proxy, which is a small HTTP server that // launchAuthProxy launches the auth proxy, which is a small HTTP server that
// authenticates requests using the Tailscale LocalAPI and then proxies them to // authenticates requests using the Tailscale LocalAPI and then proxies them to
// the kube-apiserver. // the kube-apiserver.
@ -84,6 +87,7 @@ func (h *authProxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
http.Error(w, "failed to authenticate caller", http.StatusInternalServerError) http.Error(w, "failed to authenticate caller", http.StatusInternalServerError)
return return
} }
counterNumRequestsProxied.Add(1)
h.rp.ServeHTTP(w, addWhoIsToRequest(r, who)) h.rp.ServeHTTP(w, addWhoIsToRequest(r, who))
} }

View File

@ -10,13 +10,17 @@ import (
"fmt" "fmt"
"net/netip" "net/netip"
"strings" "strings"
"sync"
"go.uber.org/zap" "go.uber.org/zap"
"golang.org/x/exp/slices" "golang.org/x/exp/slices"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/util/clientmetric"
"tailscale.com/util/set"
) )
type ServiceReconciler struct { type ServiceReconciler struct {
@ -24,8 +28,26 @@ type ServiceReconciler struct {
ssr *tailscaleSTSReconciler ssr *tailscaleSTSReconciler
logger *zap.SugaredLogger logger *zap.SugaredLogger
isDefaultLoadBalancer bool isDefaultLoadBalancer bool
mu sync.Mutex // protects following
// managedIngressProxies is a set of all ingress proxies that we're
// currently managing. This is only used for metrics.
managedIngressProxies set.Slice[types.UID]
// managedEgressProxies is a set of all egress proxies that we're currently
// managing. This is only used for metrics.
managedEgressProxies set.Slice[types.UID]
} }
var (
// gaugeEgressProxies tracks the number of egress proxies that we're
// currently managing.
gaugeEgressProxies = clientmetric.NewGauge("k8s_egress_proxies")
// gaugeIngressProxies tracks the number of ingress proxies that we're
// currently managing.
gaugeIngressProxies = clientmetric.NewGauge("k8s_ingress_proxies")
)
func childResourceLabels(name, ns, typ string) map[string]string { func childResourceLabels(name, ns, typ string) map[string]string {
// You might wonder why we're using owner references, since they seem to be // You might wonder why we're using owner references, since they seem to be
// built for exactly this. Unfortunately, Kubernetes does not support // built for exactly this. Unfortunately, Kubernetes does not support
@ -71,6 +93,12 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
ix := slices.Index(svc.Finalizers, FinalizerName) ix := slices.Index(svc.Finalizers, FinalizerName)
if ix < 0 { if ix < 0 {
logger.Debugf("no finalizer, nothing to do") logger.Debugf("no finalizer, nothing to do")
a.mu.Lock()
defer a.mu.Unlock()
a.managedIngressProxies.Remove(svc.UID)
a.managedEgressProxies.Remove(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
gaugeEgressProxies.Set(int64(a.managedEgressProxies.Len()))
return nil return nil
} }
@ -91,6 +119,13 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
// cleanup removes the tailscale finalizer, which will make all future // cleanup removes the tailscale finalizer, which will make all future
// reconciles exit early. // reconciles exit early.
logger.Infof("unexposed service from tailnet") logger.Infof("unexposed service from tailnet")
a.mu.Lock()
defer a.mu.Unlock()
a.managedIngressProxies.Remove(svc.UID)
a.managedEgressProxies.Remove(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
gaugeEgressProxies.Set(int64(a.managedEgressProxies.Len()))
return nil return nil
} }
@ -130,11 +165,17 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
ChildResourceLabels: crl, ChildResourceLabels: crl,
} }
a.mu.Lock()
if a.shouldExpose(svc) { if a.shouldExpose(svc) {
sts.ClusterTargetIP = svc.Spec.ClusterIP sts.ClusterTargetIP = svc.Spec.ClusterIP
a.managedIngressProxies.Add(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
} else if a.hasTailnetTargetAnnotation(svc) { } else if a.hasTailnetTargetAnnotation(svc) {
sts.TailnetTargetIP = svc.Annotations[AnnotationTailnetTargetIP] sts.TailnetTargetIP = svc.Annotations[AnnotationTailnetTargetIP]
a.managedEgressProxies.Add(svc.UID)
gaugeEgressProxies.Set(int64(a.managedEgressProxies.Len()))
} }
a.mu.Unlock()
var hsvc *corev1.Service var hsvc *corev1.Service
if hsvc, err = a.ssr.Provision(ctx, logger, sts); err != nil { if hsvc, err = a.ssr.Provision(ctx, logger, sts); err != nil {

View File

@ -20,6 +20,9 @@ type Slice[T comparable] struct {
// The returned value is only valid until ss is modified again. // The returned value is only valid until ss is modified again.
func (ss *Slice[T]) Slice() views.Slice[T] { return views.SliceOf(ss.slice) } func (ss *Slice[T]) Slice() views.Slice[T] { return views.SliceOf(ss.slice) }
// Len returns the number of elements in the set.
func (ss *Slice[T]) Len() int { return len(ss.slice) }
// Contains reports whether v is in the set. // Contains reports whether v is in the set.
// The amortized cost is O(1). // The amortized cost is O(1).
func (ss *Slice[T]) Contains(v T) bool { func (ss *Slice[T]) Contains(v T) bool {