cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services for ProxyGroup (#13746)

cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services

Set a readiness condition on ExternalName Services that define a tailnet target
to route cluster traffic to via a ProxyGroup's proxies. The condition
is set to true if at least one proxy is currently set up to route.

Updates tailscale/tailscale#13406

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina 2024-10-09 18:23:40 +01:00 committed by GitHub
parent 94c79659fa
commit 89ee6bbdae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 420 additions and 14 deletions

View File

@ -0,0 +1,179 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"context"
"errors"
"fmt"
"strings"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
discoveryv1 "k8s.io/api/discovery/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/tstime"
)
const (
reasonReadinessCheckFailed = "ReadinessCheckFailed"
reasonClusterResourcesNotReady = "ClusterResourcesNotReady"
reasonNoProxies = "NoProxiesConfigured"
reasonNotReady = "NotReadyToRouteTraffic"
reasonReady = "ReadyToRouteTraffic"
reasonPartiallyReady = "PartiallyReadyToRouteTraffic"
msgReadyToRouteTemplate = "%d out of %d replicas are ready to route traffic"
)
type egressSvcsReadinessReconciler struct {
client.Client
logger *zap.SugaredLogger
clock tstime.Clock
tsNamespace string
}
// Reconcile reconciles an ExternalName Service that defines a tailnet target to be exposed on a ProxyGroup and sets the
// EgressSvcReady condition on it. The condition gets set to true if at least one of the proxies is currently ready to
// route traffic to the target. It compares proxy Pod IPs with the endpoints set on the EndpointSlice for the egress
// service to determine how many replicas are currently able to route traffic.
func (esrr *egressSvcsReadinessReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
l := esrr.logger.With("Service", req.NamespacedName)
defer l.Info("reconcile finished")
svc := new(corev1.Service)
if err = esrr.Get(ctx, req.NamespacedName, svc); apierrors.IsNotFound(err) {
l.Info("Service not found")
return res, nil
} else if err != nil {
return res, fmt.Errorf("failed to get Service: %w", err)
}
var (
reason, msg string
st metav1.ConditionStatus = metav1.ConditionUnknown
)
oldStatus := svc.Status.DeepCopy()
defer func() {
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, st, reason, msg, esrr.clock, l)
if !apiequality.Semantic.DeepEqual(oldStatus, svc.Status) {
err = errors.Join(err, esrr.Status().Update(ctx, svc))
}
}()
crl := egressSvcChildResourceLabels(svc)
eps, err := getSingleObject[discoveryv1.EndpointSlice](ctx, esrr.Client, esrr.tsNamespace, crl)
if err != nil {
err = fmt.Errorf("error getting EndpointSlice: %w", err)
reason = reasonReadinessCheckFailed
msg = err.Error()
return res, err
}
if eps == nil {
l.Infof("EndpointSlice for Service does not yet exist, waiting...")
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
st = metav1.ConditionFalse
return res, nil
}
pg := &tsapi.ProxyGroup{
ObjectMeta: metav1.ObjectMeta{
Name: svc.Annotations[AnnotationProxyGroup],
},
}
err = esrr.Get(ctx, client.ObjectKeyFromObject(pg), pg)
if apierrors.IsNotFound(err) {
l.Infof("ProxyGroup for Service does not exist, waiting...")
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
st = metav1.ConditionFalse
return res, nil
}
if err != nil {
err = fmt.Errorf("error retrieving ProxyGroup: %w", err)
reason = reasonReadinessCheckFailed
msg = err.Error()
return res, err
}
if !tsoperator.ProxyGroupIsReady(pg) {
l.Infof("ProxyGroup for Service is not ready, waiting...")
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
st = metav1.ConditionFalse
return res, nil
}
replicas := pgReplicas(pg)
if replicas == 0 {
l.Infof("ProxyGroup replicas set to 0")
reason, msg = reasonNoProxies, reasonNoProxies
st = metav1.ConditionFalse
return res, nil
}
podLabels := pgLabels(pg.Name, nil)
var readyReplicas int32
for i := range replicas {
podLabels[appsv1.PodIndexLabel] = fmt.Sprintf("%d", i)
pod, err := getSingleObject[corev1.Pod](ctx, esrr.Client, esrr.tsNamespace, podLabels)
if err != nil {
err = fmt.Errorf("error retrieving ProxyGroup Pod: %w", err)
reason = reasonReadinessCheckFailed
msg = err.Error()
return res, err
}
if pod == nil {
l.Infof("[unexpected] ProxyGroup is ready, but replica %d was not found", i)
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
return res, nil
}
l.Infof("looking at Pod with IPs %v", pod.Status.PodIPs)
ready := false
for _, ep := range eps.Endpoints {
l.Infof("looking at endpoint with addresses %v", ep.Addresses)
if endpointReadyForPod(&ep, pod, l) {
l.Infof("endpoint is ready for Pod")
ready = true
break
}
}
if ready {
readyReplicas++
}
}
msg = fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
if readyReplicas == 0 {
reason = reasonNotReady
st = metav1.ConditionFalse
return res, nil
}
st = metav1.ConditionTrue
if readyReplicas < replicas {
reason = reasonPartiallyReady
} else {
reason = reasonReady
}
return res, nil
}
// endpointReadyForPod returns true if the endpoint is for the Pod's IPv4 address and is ready to serve traffic.
// Endpoint must not be nil.
func endpointReadyForPod(ep *discoveryv1.Endpoint, pod *corev1.Pod, l *zap.SugaredLogger) bool {
podIP, err := podIPv4(pod)
if err != nil {
l.Infof("[unexpected] error retrieving Pod's IPv4 address: %v", err)
return false
}
// Currently we only ever set a single address on and Endpoint and nothing else is meant to modify this.
if len(ep.Addresses) != 1 {
return false
}
return strings.EqualFold(ep.Addresses[0], podIP) &&
*ep.Conditions.Ready &&
*ep.Conditions.Serving &&
!*ep.Conditions.Terminating
}

View File

@ -0,0 +1,169 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"fmt"
"testing"
"github.com/AlekSi/pointer"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
discoveryv1 "k8s.io/api/discovery/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/tstest"
"tailscale.com/tstime"
)
func TestEgressServiceReadiness(t *testing.T) {
// We need to pass a ProxyGroup object to WithStatusSubresource because of some quirks in how the fake client
// works. Without this code further down would not be able to update ProxyGroup status.
fc := fake.NewClientBuilder().
WithScheme(tsapi.GlobalScheme).
WithStatusSubresource(&tsapi.ProxyGroup{}).
Build()
zl, _ := zap.NewDevelopment()
cl := tstest.NewClock(tstest.ClockOpts{})
rec := &egressSvcsReadinessReconciler{
tsNamespace: "operator-ns",
Client: fc,
logger: zl.Sugar(),
clock: cl,
}
tailnetFQDN := "my-app.tailnetxyz.ts.net"
egressSvc := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "my-app",
Namespace: "dev",
Annotations: map[string]string{
AnnotationProxyGroup: "dev",
AnnotationTailnetTargetFQDN: tailnetFQDN,
},
},
}
fakeClusterIPSvc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "my-app", Namespace: "operator-ns"}}
l := egressSvcEpsLabels(egressSvc, fakeClusterIPSvc)
eps := &discoveryv1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "my-app",
Namespace: "operator-ns",
Labels: l,
},
AddressType: discoveryv1.AddressTypeIPv4,
}
pg := &tsapi.ProxyGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "dev",
},
}
mustCreate(t, fc, egressSvc)
setClusterNotReady(egressSvc, cl, zl.Sugar())
t.Run("endpointslice_does_not_exist", func(t *testing.T) {
expectReconciled(t, rec, "dev", "my-app")
expectEqual(t, fc, egressSvc, nil) // not ready
})
t.Run("proxy_group_does_not_exist", func(t *testing.T) {
mustCreate(t, fc, eps)
expectReconciled(t, rec, "dev", "my-app")
expectEqual(t, fc, egressSvc, nil) // still not ready
})
t.Run("proxy_group_not_ready", func(t *testing.T) {
mustCreate(t, fc, pg)
expectReconciled(t, rec, "dev", "my-app")
expectEqual(t, fc, egressSvc, nil) // still not ready
})
t.Run("no_ready_replicas", func(t *testing.T) {
setPGReady(pg, cl, zl.Sugar())
mustUpdateStatus(t, fc, pg.Namespace, pg.Name, func(p *tsapi.ProxyGroup) {
p.Status = pg.Status
})
expectEqual(t, fc, pg, nil)
for i := range pgReplicas(pg) {
p := pod(pg, i)
mustCreate(t, fc, p)
mustUpdateStatus(t, fc, p.Namespace, p.Name, func(existing *corev1.Pod) {
existing.Status.PodIPs = p.Status.PodIPs
})
}
expectReconciled(t, rec, "dev", "my-app")
setNotReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg))
expectEqual(t, fc, egressSvc, nil) // still not ready
})
t.Run("one_ready_replica", func(t *testing.T) {
setEndpointForReplica(pg, 0, eps)
mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) {
e.Endpoints = eps.Endpoints
})
setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), 1)
expectReconciled(t, rec, "dev", "my-app")
expectEqual(t, fc, egressSvc, nil) // partially ready
})
t.Run("all_replicas_ready", func(t *testing.T) {
for i := range pgReplicas(pg) {
setEndpointForReplica(pg, i, eps)
}
mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) {
e.Endpoints = eps.Endpoints
})
setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), pgReplicas(pg))
expectReconciled(t, rec, "dev", "my-app")
expectEqual(t, fc, egressSvc, nil) // ready
})
}
func setClusterNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger) {
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonClusterResourcesNotReady, reasonClusterResourcesNotReady, cl, l)
}
func setNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas int32) {
msg := fmt.Sprintf(msgReadyToRouteTemplate, 0, replicas)
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonNotReady, msg, cl, l)
}
func setReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas, readyReplicas int32) {
reason := reasonPartiallyReady
if readyReplicas == replicas {
reason = reasonReady
}
msg := fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionTrue, reason, msg, cl, l)
}
func setPGReady(pg *tsapi.ProxyGroup, cl tstime.Clock, l *zap.SugaredLogger) {
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, "foo", "foo", pg.Generation, cl, l)
}
func setEndpointForReplica(pg *tsapi.ProxyGroup, ordinal int32, eps *discoveryv1.EndpointSlice) {
p := pod(pg, ordinal)
eps.Endpoints = append(eps.Endpoints, discoveryv1.Endpoint{
Addresses: []string{p.Status.PodIPs[0].IP},
Conditions: discoveryv1.EndpointConditions{
Ready: pointer.ToBool(true),
Serving: pointer.ToBool(true),
Terminating: pointer.ToBool(false),
},
})
}
func pod(pg *tsapi.ProxyGroup, ordinal int32) *corev1.Pod {
l := pgLabels(pg.Name, nil)
l[appsv1.PodIndexLabel] = fmt.Sprintf("%d", ordinal)
ip := fmt.Sprintf("10.0.0.%d", ordinal)
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%d", pg.Name, ordinal),
Namespace: "operator-ns",
Labels: l,
},
Status: corev1.PodStatus{
PodIPs: []corev1.PodIP{{IP: ip}},
},
}
}

View File

@ -161,7 +161,6 @@ func (esr *egressSvcsReconciler) Reconcile(ctx context.Context, req reconcile.Re
}
func (esr *egressSvcsReconciler) maybeProvision(ctx context.Context, svc *corev1.Service, l *zap.SugaredLogger) (err error) {
l.Debug("maybe provision")
r := svcConfiguredReason(svc, false, l)
st := metav1.ConditionFalse
defer func() {
@ -272,11 +271,9 @@ func (esr *egressSvcsReconciler) provision(ctx context.Context, proxyGroupName s
}
}
crl := egressSvcChildResourceLabels(svc)
crl := egressSvcEpsLabels(svc, clusterIPSvc)
// TODO(irbekrm): support IPv6, but need to investigate how kube proxy
// sets up Service -> Pod routing when IPv6 is involved.
crl[discoveryv1.LabelServiceName] = clusterIPSvc.Name
crl[discoveryv1.LabelManagedBy] = "tailscale.com"
eps := &discoveryv1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-ipv4", clusterIPSvc.Name),
@ -634,6 +631,19 @@ func egressSvcChildResourceLabels(svc *corev1.Service) map[string]string {
}
}
// egressEpsLabels returns labels to be added to an EndpointSlice created for an egress service.
func egressSvcEpsLabels(extNSvc, clusterIPSvc *corev1.Service) map[string]string {
l := egressSvcChildResourceLabels(extNSvc)
// Adding this label is what makes kube proxy set up rules to route traffic sent to the clusterIP Service to the
// endpoints defined on this EndpointSlice.
// https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#ownership
l[discoveryv1.LabelServiceName] = clusterIPSvc.Name
// Kubernetes recommends setting this label.
// https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#management
l[discoveryv1.LabelManagedBy] = "tailscale.com"
return l
}
func svcConfigurationUpToDate(svc *corev1.Service, l *zap.SugaredLogger) bool {
cond := tsoperator.GetServiceCondition(svc, tsapi.EgressSvcConfigured)
if cond == nil {

View File

@ -376,6 +376,22 @@ func runReconcilers(opts reconcilerOpts) {
startlog.Fatalf("failed setting up indexer for egress Services: %v", err)
}
egressSvcFromEpsFilter := handler.EnqueueRequestsFromMapFunc(egressSvcFromEps)
err = builder.
ControllerManagedBy(mgr).
Named("egress-svcs-readiness-reconciler").
Watches(&corev1.Service{}, egressSvcFilter).
Watches(&discoveryv1.EndpointSlice{}, egressSvcFromEpsFilter).
Complete(&egressSvcsReadinessReconciler{
Client: mgr.GetClient(),
tsNamespace: opts.tailscaleNamespace,
clock: tstime.DefaultClock{},
logger: opts.log.Named("egress-svcs-readiness-reconciler"),
})
if err != nil {
startlog.Fatalf("could not create egress Services readiness reconciler: %v", err)
}
epsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsHandler)
podsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsFromPGPods(mgr.GetClient(), opts.tailscaleNamespace))
secretsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsFromPGStateSecrets(mgr.GetClient(), opts.tailscaleNamespace))
@ -847,7 +863,7 @@ func egressEpsHandler(_ context.Context, o client.Object) []reconcile.Request {
// returns reconciler requests for all egress EndpointSlices for that ProxyGroup.
func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc {
return func(_ context.Context, o client.Object) []reconcile.Request {
if _, ok := o.GetLabels()[LabelManaged]; !ok {
if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
return nil
}
// TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we
@ -867,7 +883,7 @@ func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc {
// returns reconciler requests for all egress EndpointSlices for that ProxyGroup.
func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc {
return func(_ context.Context, o client.Object) []reconcile.Request {
if _, ok := o.GetLabels()[LabelManaged]; !ok {
if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
return nil
}
// TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we
@ -886,6 +902,33 @@ func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc {
}
}
// egressSvcFromEps is an event handler for EndpointSlices. If an EndpointSlice is for an egress ExternalName Service
// meant to be exposed on a ProxyGroup, returns a reconcile request for the Service.
func egressSvcFromEps(_ context.Context, o client.Object) []reconcile.Request {
if typ := o.GetLabels()[labelSvcType]; typ != typeEgress {
return nil
}
if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
return nil
}
svcName, ok := o.GetLabels()[LabelParentName]
if !ok {
return nil
}
svcNs, ok := o.GetLabels()[LabelParentNamespace]
if !ok {
return nil
}
return []reconcile.Request{
{
NamespacedName: types.NamespacedName{
Namespace: svcNs,
Name: svcName,
},
},
}
}
func reconcileRequestsForPG(pg string, cl client.Client, ns string) []reconcile.Request {
epsList := discoveryv1.EndpointSliceList{}
if err := cl.List(context.Background(), &epsList,

View File

@ -172,14 +172,19 @@ type ConditionType string
const (
ConnectorReady ConditionType = `ConnectorReady`
ProxyClassReady ConditionType = `ProxyClassReady`
ProxyGroupReady ConditionType = `ProxyGroupReady`
ProxyGroupReady ConditionType = `TailscaleProxyGroupReady`
ProxyReady ConditionType = `TailscaleProxyReady` // a Tailscale-specific condition type for corev1.Service
RecorderReady ConditionType = `RecorderReady`
// EgressSvcValid is set to true if the user configured ExternalName Service for exposing a tailnet target on
// ProxyGroup nodes is valid.
EgressSvcValid ConditionType = `EgressSvcValid`
// EgressSvcConfigured is set to true if the configuration for the egress Service (proxy ConfigMap update,
// EndpointSlice for the Service) has been successfully applied. The Reason for this condition
// contains the name of the ProxyGroup and the hash of the Service ports and the tailnet target.
EgressSvcConfigured ConditionType = `EgressSvcConfigured`
// EgressSvcValid gets set on a user configured ExternalName Service that defines a tailnet target to be exposed
// on a ProxyGroup.
// Set to true if the user provided configuration is valid.
EgressSvcValid ConditionType = `TailscaleEgressSvcValid`
// EgressSvcConfigured gets set on a user configured ExternalName Service that defines a tailnet target to be exposed
// on a ProxyGroup.
// Set to true if the cluster resources for the service have been successfully configured.
EgressSvcConfigured ConditionType = `TailscaleEgressSvcConfigured`
// EgressSvcReady gets set on a user configured ExternalName Service that defines a tailnet target to be exposed
// on a ProxyGroup.
// Set to true if the service is ready to route cluster traffic.
EgressSvcReady ConditionType = `TailscaleEgressSvcReady`
)