From 89ee6bbdaef4216fd1664f3c91bd5244e24bd252 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Wed, 9 Oct 2024 18:23:40 +0100 Subject: [PATCH] cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services for ProxyGroup (#13746) cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services Set a readiness condition on ExternalName Services that define a tailnet target to route cluster traffic to via a ProxyGroup's proxies. The condition is set to true if at least one proxy is currently set up to route. Updates tailscale/tailscale#13406 Signed-off-by: Irbe Krumina --- cmd/k8s-operator/egress-services-readiness.go | 179 ++++++++++++++++++ .../egress-services-readiness_test.go | 169 +++++++++++++++++ cmd/k8s-operator/egress-services.go | 18 +- cmd/k8s-operator/operator.go | 47 ++++- k8s-operator/apis/v1alpha1/types_connector.go | 21 +- 5 files changed, 420 insertions(+), 14 deletions(-) create mode 100644 cmd/k8s-operator/egress-services-readiness.go create mode 100644 cmd/k8s-operator/egress-services-readiness_test.go diff --git a/cmd/k8s-operator/egress-services-readiness.go b/cmd/k8s-operator/egress-services-readiness.go new file mode 100644 index 000000000..f6991145f --- /dev/null +++ b/cmd/k8s-operator/egress-services-readiness.go @@ -0,0 +1,179 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "context" + "errors" + "fmt" + "strings" + + "go.uber.org/zap" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + discoveryv1 "k8s.io/api/discovery/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + tsoperator "tailscale.com/k8s-operator" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/tstime" +) + +const ( + reasonReadinessCheckFailed = "ReadinessCheckFailed" + reasonClusterResourcesNotReady = "ClusterResourcesNotReady" + reasonNoProxies = "NoProxiesConfigured" + reasonNotReady = "NotReadyToRouteTraffic" + reasonReady = "ReadyToRouteTraffic" + reasonPartiallyReady = "PartiallyReadyToRouteTraffic" + msgReadyToRouteTemplate = "%d out of %d replicas are ready to route traffic" +) + +type egressSvcsReadinessReconciler struct { + client.Client + logger *zap.SugaredLogger + clock tstime.Clock + tsNamespace string +} + +// Reconcile reconciles an ExternalName Service that defines a tailnet target to be exposed on a ProxyGroup and sets the +// EgressSvcReady condition on it. The condition gets set to true if at least one of the proxies is currently ready to +// route traffic to the target. It compares proxy Pod IPs with the endpoints set on the EndpointSlice for the egress +// service to determine how many replicas are currently able to route traffic. +func (esrr *egressSvcsReadinessReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) { + l := esrr.logger.With("Service", req.NamespacedName) + defer l.Info("reconcile finished") + + svc := new(corev1.Service) + if err = esrr.Get(ctx, req.NamespacedName, svc); apierrors.IsNotFound(err) { + l.Info("Service not found") + return res, nil + } else if err != nil { + return res, fmt.Errorf("failed to get Service: %w", err) + } + var ( + reason, msg string + st metav1.ConditionStatus = metav1.ConditionUnknown + ) + oldStatus := svc.Status.DeepCopy() + defer func() { + tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, st, reason, msg, esrr.clock, l) + if !apiequality.Semantic.DeepEqual(oldStatus, svc.Status) { + err = errors.Join(err, esrr.Status().Update(ctx, svc)) + } + }() + + crl := egressSvcChildResourceLabels(svc) + eps, err := getSingleObject[discoveryv1.EndpointSlice](ctx, esrr.Client, esrr.tsNamespace, crl) + if err != nil { + err = fmt.Errorf("error getting EndpointSlice: %w", err) + reason = reasonReadinessCheckFailed + msg = err.Error() + return res, err + } + if eps == nil { + l.Infof("EndpointSlice for Service does not yet exist, waiting...") + reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady + st = metav1.ConditionFalse + return res, nil + } + pg := &tsapi.ProxyGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: svc.Annotations[AnnotationProxyGroup], + }, + } + err = esrr.Get(ctx, client.ObjectKeyFromObject(pg), pg) + if apierrors.IsNotFound(err) { + l.Infof("ProxyGroup for Service does not exist, waiting...") + reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady + st = metav1.ConditionFalse + return res, nil + } + if err != nil { + err = fmt.Errorf("error retrieving ProxyGroup: %w", err) + reason = reasonReadinessCheckFailed + msg = err.Error() + return res, err + } + if !tsoperator.ProxyGroupIsReady(pg) { + l.Infof("ProxyGroup for Service is not ready, waiting...") + reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady + st = metav1.ConditionFalse + return res, nil + } + + replicas := pgReplicas(pg) + if replicas == 0 { + l.Infof("ProxyGroup replicas set to 0") + reason, msg = reasonNoProxies, reasonNoProxies + st = metav1.ConditionFalse + return res, nil + } + podLabels := pgLabels(pg.Name, nil) + var readyReplicas int32 + for i := range replicas { + podLabels[appsv1.PodIndexLabel] = fmt.Sprintf("%d", i) + pod, err := getSingleObject[corev1.Pod](ctx, esrr.Client, esrr.tsNamespace, podLabels) + if err != nil { + err = fmt.Errorf("error retrieving ProxyGroup Pod: %w", err) + reason = reasonReadinessCheckFailed + msg = err.Error() + return res, err + } + if pod == nil { + l.Infof("[unexpected] ProxyGroup is ready, but replica %d was not found", i) + reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady + return res, nil + } + l.Infof("looking at Pod with IPs %v", pod.Status.PodIPs) + ready := false + for _, ep := range eps.Endpoints { + l.Infof("looking at endpoint with addresses %v", ep.Addresses) + if endpointReadyForPod(&ep, pod, l) { + l.Infof("endpoint is ready for Pod") + ready = true + break + } + } + if ready { + readyReplicas++ + } + } + msg = fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas) + if readyReplicas == 0 { + reason = reasonNotReady + st = metav1.ConditionFalse + return res, nil + } + st = metav1.ConditionTrue + if readyReplicas < replicas { + reason = reasonPartiallyReady + } else { + reason = reasonReady + } + return res, nil +} + +// endpointReadyForPod returns true if the endpoint is for the Pod's IPv4 address and is ready to serve traffic. +// Endpoint must not be nil. +func endpointReadyForPod(ep *discoveryv1.Endpoint, pod *corev1.Pod, l *zap.SugaredLogger) bool { + podIP, err := podIPv4(pod) + if err != nil { + l.Infof("[unexpected] error retrieving Pod's IPv4 address: %v", err) + return false + } + // Currently we only ever set a single address on and Endpoint and nothing else is meant to modify this. + if len(ep.Addresses) != 1 { + return false + } + return strings.EqualFold(ep.Addresses[0], podIP) && + *ep.Conditions.Ready && + *ep.Conditions.Serving && + !*ep.Conditions.Terminating +} diff --git a/cmd/k8s-operator/egress-services-readiness_test.go b/cmd/k8s-operator/egress-services-readiness_test.go new file mode 100644 index 000000000..052eb1a49 --- /dev/null +++ b/cmd/k8s-operator/egress-services-readiness_test.go @@ -0,0 +1,169 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "fmt" + "testing" + + "github.com/AlekSi/pointer" + "go.uber.org/zap" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + discoveryv1 "k8s.io/api/discovery/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + tsoperator "tailscale.com/k8s-operator" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/tstest" + "tailscale.com/tstime" +) + +func TestEgressServiceReadiness(t *testing.T) { + // We need to pass a ProxyGroup object to WithStatusSubresource because of some quirks in how the fake client + // works. Without this code further down would not be able to update ProxyGroup status. + fc := fake.NewClientBuilder(). + WithScheme(tsapi.GlobalScheme). + WithStatusSubresource(&tsapi.ProxyGroup{}). + Build() + zl, _ := zap.NewDevelopment() + cl := tstest.NewClock(tstest.ClockOpts{}) + rec := &egressSvcsReadinessReconciler{ + tsNamespace: "operator-ns", + Client: fc, + logger: zl.Sugar(), + clock: cl, + } + tailnetFQDN := "my-app.tailnetxyz.ts.net" + egressSvc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-app", + Namespace: "dev", + Annotations: map[string]string{ + AnnotationProxyGroup: "dev", + AnnotationTailnetTargetFQDN: tailnetFQDN, + }, + }, + } + fakeClusterIPSvc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "my-app", Namespace: "operator-ns"}} + l := egressSvcEpsLabels(egressSvc, fakeClusterIPSvc) + eps := &discoveryv1.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-app", + Namespace: "operator-ns", + Labels: l, + }, + AddressType: discoveryv1.AddressTypeIPv4, + } + pg := &tsapi.ProxyGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dev", + }, + } + mustCreate(t, fc, egressSvc) + setClusterNotReady(egressSvc, cl, zl.Sugar()) + t.Run("endpointslice_does_not_exist", func(t *testing.T) { + expectReconciled(t, rec, "dev", "my-app") + expectEqual(t, fc, egressSvc, nil) // not ready + }) + t.Run("proxy_group_does_not_exist", func(t *testing.T) { + mustCreate(t, fc, eps) + expectReconciled(t, rec, "dev", "my-app") + expectEqual(t, fc, egressSvc, nil) // still not ready + }) + t.Run("proxy_group_not_ready", func(t *testing.T) { + mustCreate(t, fc, pg) + expectReconciled(t, rec, "dev", "my-app") + expectEqual(t, fc, egressSvc, nil) // still not ready + }) + t.Run("no_ready_replicas", func(t *testing.T) { + setPGReady(pg, cl, zl.Sugar()) + mustUpdateStatus(t, fc, pg.Namespace, pg.Name, func(p *tsapi.ProxyGroup) { + p.Status = pg.Status + }) + expectEqual(t, fc, pg, nil) + for i := range pgReplicas(pg) { + p := pod(pg, i) + mustCreate(t, fc, p) + mustUpdateStatus(t, fc, p.Namespace, p.Name, func(existing *corev1.Pod) { + existing.Status.PodIPs = p.Status.PodIPs + }) + } + expectReconciled(t, rec, "dev", "my-app") + setNotReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg)) + expectEqual(t, fc, egressSvc, nil) // still not ready + }) + t.Run("one_ready_replica", func(t *testing.T) { + setEndpointForReplica(pg, 0, eps) + mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) { + e.Endpoints = eps.Endpoints + }) + setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), 1) + expectReconciled(t, rec, "dev", "my-app") + expectEqual(t, fc, egressSvc, nil) // partially ready + }) + t.Run("all_replicas_ready", func(t *testing.T) { + for i := range pgReplicas(pg) { + setEndpointForReplica(pg, i, eps) + } + mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) { + e.Endpoints = eps.Endpoints + }) + setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), pgReplicas(pg)) + expectReconciled(t, rec, "dev", "my-app") + expectEqual(t, fc, egressSvc, nil) // ready + }) +} + +func setClusterNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger) { + tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonClusterResourcesNotReady, reasonClusterResourcesNotReady, cl, l) +} + +func setNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas int32) { + msg := fmt.Sprintf(msgReadyToRouteTemplate, 0, replicas) + tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonNotReady, msg, cl, l) +} + +func setReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas, readyReplicas int32) { + reason := reasonPartiallyReady + if readyReplicas == replicas { + reason = reasonReady + } + msg := fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas) + tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionTrue, reason, msg, cl, l) +} + +func setPGReady(pg *tsapi.ProxyGroup, cl tstime.Clock, l *zap.SugaredLogger) { + tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, "foo", "foo", pg.Generation, cl, l) +} + +func setEndpointForReplica(pg *tsapi.ProxyGroup, ordinal int32, eps *discoveryv1.EndpointSlice) { + p := pod(pg, ordinal) + eps.Endpoints = append(eps.Endpoints, discoveryv1.Endpoint{ + Addresses: []string{p.Status.PodIPs[0].IP}, + Conditions: discoveryv1.EndpointConditions{ + Ready: pointer.ToBool(true), + Serving: pointer.ToBool(true), + Terminating: pointer.ToBool(false), + }, + }) +} + +func pod(pg *tsapi.ProxyGroup, ordinal int32) *corev1.Pod { + l := pgLabels(pg.Name, nil) + l[appsv1.PodIndexLabel] = fmt.Sprintf("%d", ordinal) + ip := fmt.Sprintf("10.0.0.%d", ordinal) + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d", pg.Name, ordinal), + Namespace: "operator-ns", + Labels: l, + }, + Status: corev1.PodStatus{ + PodIPs: []corev1.PodIP{{IP: ip}}, + }, + } +} diff --git a/cmd/k8s-operator/egress-services.go b/cmd/k8s-operator/egress-services.go index 20bafe8ec..98ed94366 100644 --- a/cmd/k8s-operator/egress-services.go +++ b/cmd/k8s-operator/egress-services.go @@ -161,7 +161,6 @@ func (esr *egressSvcsReconciler) Reconcile(ctx context.Context, req reconcile.Re } func (esr *egressSvcsReconciler) maybeProvision(ctx context.Context, svc *corev1.Service, l *zap.SugaredLogger) (err error) { - l.Debug("maybe provision") r := svcConfiguredReason(svc, false, l) st := metav1.ConditionFalse defer func() { @@ -272,11 +271,9 @@ func (esr *egressSvcsReconciler) provision(ctx context.Context, proxyGroupName s } } - crl := egressSvcChildResourceLabels(svc) + crl := egressSvcEpsLabels(svc, clusterIPSvc) // TODO(irbekrm): support IPv6, but need to investigate how kube proxy // sets up Service -> Pod routing when IPv6 is involved. - crl[discoveryv1.LabelServiceName] = clusterIPSvc.Name - crl[discoveryv1.LabelManagedBy] = "tailscale.com" eps := &discoveryv1.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ Name: fmt.Sprintf("%s-ipv4", clusterIPSvc.Name), @@ -634,6 +631,19 @@ func egressSvcChildResourceLabels(svc *corev1.Service) map[string]string { } } +// egressEpsLabels returns labels to be added to an EndpointSlice created for an egress service. +func egressSvcEpsLabels(extNSvc, clusterIPSvc *corev1.Service) map[string]string { + l := egressSvcChildResourceLabels(extNSvc) + // Adding this label is what makes kube proxy set up rules to route traffic sent to the clusterIP Service to the + // endpoints defined on this EndpointSlice. + // https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#ownership + l[discoveryv1.LabelServiceName] = clusterIPSvc.Name + // Kubernetes recommends setting this label. + // https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#management + l[discoveryv1.LabelManagedBy] = "tailscale.com" + return l +} + func svcConfigurationUpToDate(svc *corev1.Service, l *zap.SugaredLogger) bool { cond := tsoperator.GetServiceCondition(svc, tsapi.EgressSvcConfigured) if cond == nil { diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index ff29618df..bd9c0f7bc 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -376,6 +376,22 @@ func runReconcilers(opts reconcilerOpts) { startlog.Fatalf("failed setting up indexer for egress Services: %v", err) } + egressSvcFromEpsFilter := handler.EnqueueRequestsFromMapFunc(egressSvcFromEps) + err = builder. + ControllerManagedBy(mgr). + Named("egress-svcs-readiness-reconciler"). + Watches(&corev1.Service{}, egressSvcFilter). + Watches(&discoveryv1.EndpointSlice{}, egressSvcFromEpsFilter). + Complete(&egressSvcsReadinessReconciler{ + Client: mgr.GetClient(), + tsNamespace: opts.tailscaleNamespace, + clock: tstime.DefaultClock{}, + logger: opts.log.Named("egress-svcs-readiness-reconciler"), + }) + if err != nil { + startlog.Fatalf("could not create egress Services readiness reconciler: %v", err) + } + epsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsHandler) podsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsFromPGPods(mgr.GetClient(), opts.tailscaleNamespace)) secretsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsFromPGStateSecrets(mgr.GetClient(), opts.tailscaleNamespace)) @@ -847,7 +863,7 @@ func egressEpsHandler(_ context.Context, o client.Object) []reconcile.Request { // returns reconciler requests for all egress EndpointSlices for that ProxyGroup. func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc { return func(_ context.Context, o client.Object) []reconcile.Request { - if _, ok := o.GetLabels()[LabelManaged]; !ok { + if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" { return nil } // TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we @@ -867,7 +883,7 @@ func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc { // returns reconciler requests for all egress EndpointSlices for that ProxyGroup. func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc { return func(_ context.Context, o client.Object) []reconcile.Request { - if _, ok := o.GetLabels()[LabelManaged]; !ok { + if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" { return nil } // TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we @@ -886,6 +902,33 @@ func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc { } } +// egressSvcFromEps is an event handler for EndpointSlices. If an EndpointSlice is for an egress ExternalName Service +// meant to be exposed on a ProxyGroup, returns a reconcile request for the Service. +func egressSvcFromEps(_ context.Context, o client.Object) []reconcile.Request { + if typ := o.GetLabels()[labelSvcType]; typ != typeEgress { + return nil + } + if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" { + return nil + } + svcName, ok := o.GetLabels()[LabelParentName] + if !ok { + return nil + } + svcNs, ok := o.GetLabels()[LabelParentNamespace] + if !ok { + return nil + } + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Namespace: svcNs, + Name: svcName, + }, + }, + } +} + func reconcileRequestsForPG(pg string, cl client.Client, ns string) []reconcile.Request { epsList := discoveryv1.EndpointSliceList{} if err := cl.List(context.Background(), &epsList, diff --git a/k8s-operator/apis/v1alpha1/types_connector.go b/k8s-operator/apis/v1alpha1/types_connector.go index 07d05e1a5..358c2dd7a 100644 --- a/k8s-operator/apis/v1alpha1/types_connector.go +++ b/k8s-operator/apis/v1alpha1/types_connector.go @@ -172,14 +172,19 @@ type ConditionType string const ( ConnectorReady ConditionType = `ConnectorReady` ProxyClassReady ConditionType = `ProxyClassReady` - ProxyGroupReady ConditionType = `ProxyGroupReady` + ProxyGroupReady ConditionType = `TailscaleProxyGroupReady` ProxyReady ConditionType = `TailscaleProxyReady` // a Tailscale-specific condition type for corev1.Service RecorderReady ConditionType = `RecorderReady` - // EgressSvcValid is set to true if the user configured ExternalName Service for exposing a tailnet target on - // ProxyGroup nodes is valid. - EgressSvcValid ConditionType = `EgressSvcValid` - // EgressSvcConfigured is set to true if the configuration for the egress Service (proxy ConfigMap update, - // EndpointSlice for the Service) has been successfully applied. The Reason for this condition - // contains the name of the ProxyGroup and the hash of the Service ports and the tailnet target. - EgressSvcConfigured ConditionType = `EgressSvcConfigured` + // EgressSvcValid gets set on a user configured ExternalName Service that defines a tailnet target to be exposed + // on a ProxyGroup. + // Set to true if the user provided configuration is valid. + EgressSvcValid ConditionType = `TailscaleEgressSvcValid` + // EgressSvcConfigured gets set on a user configured ExternalName Service that defines a tailnet target to be exposed + // on a ProxyGroup. + // Set to true if the cluster resources for the service have been successfully configured. + EgressSvcConfigured ConditionType = `TailscaleEgressSvcConfigured` + // EgressSvcReady gets set on a user configured ExternalName Service that defines a tailnet target to be exposed + // on a ProxyGroup. + // Set to true if the service is ready to route cluster traffic. + EgressSvcReady ConditionType = `TailscaleEgressSvcReady` )