2023-08-23 16:35:12 +01:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2023-08-24 23:02:42 +01:00
//go:build !plan9
2023-08-23 16:35:12 +01:00
package main
import (
"context"
"fmt"
2023-08-28 20:56:44 +01:00
"net/netip"
2023-10-04 03:12:37 +01:00
"slices"
2023-08-23 16:35:12 +01:00
"strings"
2023-08-30 17:49:11 +01:00
"sync"
2023-08-23 16:35:12 +01:00
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
2023-08-30 17:49:11 +01:00
"k8s.io/apimachinery/pkg/types"
2023-10-17 18:05:02 +01:00
"k8s.io/client-go/tools/record"
2023-08-23 16:35:12 +01:00
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2023-08-30 17:49:11 +01:00
"tailscale.com/util/clientmetric"
"tailscale.com/util/set"
2023-08-23 16:35:12 +01:00
)
type ServiceReconciler struct {
client . Client
2023-08-17 01:35:36 +01:00
ssr * tailscaleSTSReconciler
logger * zap . SugaredLogger
isDefaultLoadBalancer bool
2023-08-30 17:49:11 +01:00
mu sync . Mutex // protects following
// managedIngressProxies is a set of all ingress proxies that we're
// currently managing. This is only used for metrics.
managedIngressProxies set . Slice [ types . UID ]
// managedEgressProxies is a set of all egress proxies that we're currently
// managing. This is only used for metrics.
managedEgressProxies set . Slice [ types . UID ]
2023-10-17 18:05:02 +01:00
recorder record . EventRecorder
2023-08-23 16:35:12 +01:00
}
2023-08-30 17:49:11 +01:00
var (
// gaugeEgressProxies tracks the number of egress proxies that we're
// currently managing.
gaugeEgressProxies = clientmetric . NewGauge ( "k8s_egress_proxies" )
// gaugeIngressProxies tracks the number of ingress proxies that we're
// currently managing.
gaugeIngressProxies = clientmetric . NewGauge ( "k8s_ingress_proxies" )
)
2023-08-24 20:18:17 +01:00
func childResourceLabels ( name , ns , typ string ) map [ string ] string {
2023-08-23 16:35:12 +01:00
// You might wonder why we're using owner references, since they seem to be
// built for exactly this. Unfortunately, Kubernetes does not support
// cross-namespace ownership, by design. This means we cannot make the
// service being exposed the owner of the implementation details of the
// proxying. Instead, we have to do our own filtering and tracking with
// labels.
return map [ string ] string {
LabelManaged : "true" ,
2023-08-24 20:18:17 +01:00
LabelParentName : name ,
LabelParentNamespace : ns ,
LabelParentType : typ ,
2023-08-23 16:35:12 +01:00
}
}
func ( a * ServiceReconciler ) Reconcile ( ctx context . Context , req reconcile . Request ) ( _ reconcile . Result , err error ) {
logger := a . logger . With ( "service-ns" , req . Namespace , "service-name" , req . Name )
logger . Debugf ( "starting reconcile" )
defer logger . Debugf ( "reconcile finished" )
svc := new ( corev1 . Service )
err = a . Get ( ctx , req . NamespacedName , svc )
if apierrors . IsNotFound ( err ) {
// Request object not found, could have been deleted after reconcile request.
logger . Debugf ( "service not found, assuming it was deleted" )
return reconcile . Result { } , nil
} else if err != nil {
return reconcile . Result { } , fmt . Errorf ( "failed to get svc: %w" , err )
}
2023-09-20 16:51:50 +01:00
targetIP := a . tailnetTargetAnnotation ( svc )
2023-11-24 16:24:48 +00:00
targetFQDN := svc . Annotations [ AnnotationTailnetTargetFQDN ]
if ! svc . DeletionTimestamp . IsZero ( ) || ! a . shouldExpose ( svc ) && targetIP == "" && targetFQDN == "" {
2023-08-30 08:31:37 +01:00
logger . Debugf ( "service is being deleted or is (no longer) referring to Tailscale ingress/egress, ensuring any created resources are cleaned up" )
2023-08-23 16:35:12 +01:00
return reconcile . Result { } , a . maybeCleanup ( ctx , logger , svc )
}
return reconcile . Result { } , a . maybeProvision ( ctx , logger , svc )
}
// maybeCleanup removes any existing resources related to serving svc over tailscale.
//
// This function is responsible for removing the finalizer from the service,
// once all associated resources are gone.
func ( a * ServiceReconciler ) maybeCleanup ( ctx context . Context , logger * zap . SugaredLogger , svc * corev1 . Service ) error {
ix := slices . Index ( svc . Finalizers , FinalizerName )
if ix < 0 {
logger . Debugf ( "no finalizer, nothing to do" )
2023-08-30 17:49:11 +01:00
a . mu . Lock ( )
defer a . mu . Unlock ( )
a . managedIngressProxies . Remove ( svc . UID )
a . managedEgressProxies . Remove ( svc . UID )
gaugeIngressProxies . Set ( int64 ( a . managedIngressProxies . Len ( ) ) )
gaugeEgressProxies . Set ( int64 ( a . managedEgressProxies . Len ( ) ) )
2023-08-23 16:35:12 +01:00
return nil
}
2023-08-24 20:18:17 +01:00
if done , err := a . ssr . Cleanup ( ctx , logger , childResourceLabels ( svc . Name , svc . Namespace , "svc" ) ) ; err != nil {
2023-08-23 16:35:12 +01:00
return fmt . Errorf ( "failed to cleanup: %w" , err )
} else if ! done {
logger . Debugf ( "cleanup not done yet, waiting for next reconcile" )
return nil
}
svc . Finalizers = append ( svc . Finalizers [ : ix ] , svc . Finalizers [ ix + 1 : ] ... )
if err := a . Update ( ctx , svc ) ; err != nil {
return fmt . Errorf ( "failed to remove finalizer: %w" , err )
}
// Unlike most log entries in the reconcile loop, this will get printed
// exactly once at the very end of cleanup, because the final step of
// cleanup removes the tailscale finalizer, which will make all future
// reconciles exit early.
logger . Infof ( "unexposed service from tailnet" )
2023-08-30 17:49:11 +01:00
a . mu . Lock ( )
defer a . mu . Unlock ( )
a . managedIngressProxies . Remove ( svc . UID )
a . managedEgressProxies . Remove ( svc . UID )
gaugeIngressProxies . Set ( int64 ( a . managedIngressProxies . Len ( ) ) )
gaugeEgressProxies . Set ( int64 ( a . managedEgressProxies . Len ( ) ) )
2023-08-23 16:35:12 +01:00
return nil
}
// maybeProvision ensures that svc is exposed over tailscale, taking any actions
// necessary to reach that state.
//
// This function adds a finalizer to svc, ensuring that we can handle orderly
// deprovisioning later.
func ( a * ServiceReconciler ) maybeProvision ( ctx context . Context , logger * zap . SugaredLogger , svc * corev1 . Service ) error {
2023-11-24 16:24:48 +00:00
// Run for proxy config related validations here as opposed to running
// them earlier. This is to prevent cleanup being blocked on a
// misconfigured proxy param.
2023-10-17 18:05:02 +01:00
if err := a . ssr . validate ( ) ; err != nil {
msg := fmt . Sprintf ( "unable to provision proxy resources: invalid config: %v" , err )
a . recorder . Event ( svc , corev1 . EventTypeWarning , "INVALIDCONFIG" , msg )
a . logger . Error ( msg )
return nil
}
2023-11-24 16:24:48 +00:00
if violations := validateService ( svc ) ; len ( violations ) > 0 {
msg := fmt . Sprintf ( "unable to provision proxy resources: invalid Service: %s" , strings . Join ( violations , ", " ) )
a . recorder . Event ( svc , corev1 . EventTypeWarning , "INVALIDSERVCICE" , msg )
a . logger . Error ( msg )
return nil
}
2023-08-23 16:35:12 +01:00
hostname , err := nameForService ( svc )
if err != nil {
return err
}
if ! slices . Contains ( svc . Finalizers , FinalizerName ) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
// this is a nice place to tell the operator that the high level,
// multi-reconcile operation is underway.
logger . Infof ( "exposing service over tailscale" )
svc . Finalizers = append ( svc . Finalizers , FinalizerName )
if err := a . Update ( ctx , svc ) ; err != nil {
return fmt . Errorf ( "failed to add finalizer: %w" , err )
}
}
2023-08-24 20:18:17 +01:00
crl := childResourceLabels ( svc . Name , svc . Namespace , "svc" )
2023-08-23 16:35:12 +01:00
var tags [ ] string
if tstr , ok := svc . Annotations [ AnnotationTags ] ; ok {
tags = strings . Split ( tstr , "," )
}
sts := & tailscaleSTSConfig {
ParentResourceName : svc . Name ,
ParentResourceUID : string ( svc . UID ) ,
Hostname : hostname ,
Tags : tags ,
ChildResourceLabels : crl ,
2023-08-30 14:22:06 +01:00
}
2023-08-30 17:49:11 +01:00
a . mu . Lock ( )
2023-08-30 14:22:06 +01:00
if a . shouldExpose ( svc ) {
sts . ClusterTargetIP = svc . Spec . ClusterIP
2023-08-30 17:49:11 +01:00
a . managedIngressProxies . Add ( svc . UID )
gaugeIngressProxies . Set ( int64 ( a . managedIngressProxies . Len ( ) ) )
2023-09-20 16:51:50 +01:00
} else if ip := a . tailnetTargetAnnotation ( svc ) ; ip != "" {
sts . TailnetTargetIP = ip
2023-08-30 17:49:11 +01:00
a . managedEgressProxies . Add ( svc . UID )
gaugeEgressProxies . Set ( int64 ( a . managedEgressProxies . Len ( ) ) )
2023-11-24 16:24:48 +00:00
} else if fqdn := svc . Annotations [ AnnotationTailnetTargetFQDN ] ; fqdn != "" {
fqdn := svc . Annotations [ AnnotationTailnetTargetFQDN ]
if ! strings . HasSuffix ( fqdn , "." ) {
fqdn = fqdn + "."
}
sts . TailnetTargetFQDN = fqdn
a . managedEgressProxies . Add ( svc . UID )
gaugeEgressProxies . Set ( int64 ( a . managedEgressProxies . Len ( ) ) )
2023-08-23 16:35:12 +01:00
}
2023-08-30 17:49:11 +01:00
a . mu . Unlock ( )
2023-08-23 16:35:12 +01:00
2023-08-30 08:31:37 +01:00
var hsvc * corev1 . Service
if hsvc , err = a . ssr . Provision ( ctx , logger , sts ) ; err != nil {
2023-08-23 16:35:12 +01:00
return fmt . Errorf ( "failed to provision: %w" , err )
}
2023-11-24 16:24:48 +00:00
if sts . TailnetTargetIP != "" || sts . TailnetTargetFQDN != "" {
2023-09-20 20:58:28 +01:00
// TODO (irbekrm): cluster.local is the default DNS name, but
// can be changed by users. Make this configurable or figure out
// how to discover the DNS name from within operator
headlessSvcName := hsvc . Name + "." + hsvc . Namespace + ".svc.cluster.local"
2023-08-30 08:31:37 +01:00
if svc . Spec . ExternalName != headlessSvcName || svc . Spec . Type != corev1 . ServiceTypeExternalName {
svc . Spec . ExternalName = headlessSvcName
svc . Spec . Selector = nil
svc . Spec . Type = corev1 . ServiceTypeExternalName
if err := a . Update ( ctx , svc ) ; err != nil {
return fmt . Errorf ( "failed to update service: %w" , err )
}
}
return nil
}
2023-08-23 16:35:12 +01:00
if ! a . hasLoadBalancerClass ( svc ) {
logger . Debugf ( "service is not a LoadBalancer, so not updating ingress" )
return nil
}
2023-08-09 00:03:08 +01:00
_ , tsHost , tsIPs , err := a . ssr . DeviceInfo ( ctx , crl )
2023-08-23 16:35:12 +01:00
if err != nil {
return fmt . Errorf ( "failed to get device ID: %w" , err )
}
if tsHost == "" {
logger . Debugf ( "no Tailscale hostname known yet, waiting for proxy pod to finish auth" )
// No hostname yet. Wait for the proxy pod to auth.
svc . Status . LoadBalancer . Ingress = nil
if err := a . Status ( ) . Update ( ctx , svc ) ; err != nil {
return fmt . Errorf ( "failed to update service status: %w" , err )
}
return nil
}
2023-08-09 00:03:08 +01:00
logger . Debugf ( "setting ingress to %q, %s" , tsHost , strings . Join ( tsIPs , ", " ) )
ingress := [ ] corev1 . LoadBalancerIngress {
{ Hostname : tsHost } ,
2023-08-23 16:35:12 +01:00
}
2023-08-30 08:31:37 +01:00
clusterIPAddr , err := netip . ParseAddr ( svc . Spec . ClusterIP )
if err != nil {
return fmt . Errorf ( "failed to parse cluster IP: %w" , err )
}
2023-08-09 00:03:08 +01:00
for _ , ip := range tsIPs {
2023-08-28 20:56:44 +01:00
addr , err := netip . ParseAddr ( ip )
if err != nil {
continue
}
if addr . Is4 ( ) == clusterIPAddr . Is4 ( ) { // only add addresses of the same family
ingress = append ( ingress , corev1 . LoadBalancerIngress { IP : ip } )
}
2023-08-09 00:03:08 +01:00
}
svc . Status . LoadBalancer . Ingress = ingress
2023-08-23 16:35:12 +01:00
if err := a . Status ( ) . Update ( ctx , svc ) ; err != nil {
return fmt . Errorf ( "failed to update service status: %w" , err )
}
return nil
}
2023-11-24 16:24:48 +00:00
func validateService ( svc * corev1 . Service ) [ ] string {
violations := make ( [ ] string , 0 )
if svc . Annotations [ AnnotationTailnetTargetFQDN ] != "" && svc . Annotations [ AnnotationTailnetTargetIP ] != "" {
violations = append ( violations , "only one of annotations %s and %s can be set" , AnnotationTailnetTargetIP , AnnotationTailnetTargetFQDN )
}
if fqdn := svc . Annotations [ AnnotationTailnetTargetFQDN ] ; fqdn != "" {
if ! isMagicDNSName ( fqdn ) {
violations = append ( violations , fmt . Sprintf ( "invalid value of annotation %s: %q does not appear to be a valid MagicDNS name" , AnnotationTailnetTargetFQDN , fqdn ) )
}
}
return violations
}
2023-08-23 16:35:12 +01:00
func ( a * ServiceReconciler ) shouldExpose ( svc * corev1 . Service ) bool {
// Headless services can't be exposed, since there is no ClusterIP to
// forward to.
if svc . Spec . ClusterIP == "" || svc . Spec . ClusterIP == "None" {
return false
}
2023-08-30 08:31:37 +01:00
return a . hasLoadBalancerClass ( svc ) || a . hasExposeAnnotation ( svc )
2023-08-23 16:35:12 +01:00
}
func ( a * ServiceReconciler ) hasLoadBalancerClass ( svc * corev1 . Service ) bool {
return svc != nil &&
svc . Spec . Type == corev1 . ServiceTypeLoadBalancer &&
2023-08-17 01:35:36 +01:00
( svc . Spec . LoadBalancerClass != nil && * svc . Spec . LoadBalancerClass == "tailscale" ||
svc . Spec . LoadBalancerClass == nil && a . isDefaultLoadBalancer )
2023-08-23 16:35:12 +01:00
}
2023-08-30 08:31:37 +01:00
// hasExposeAnnotation reports whether Service has the tailscale.com/expose
// annotation set
func ( a * ServiceReconciler ) hasExposeAnnotation ( svc * corev1 . Service ) bool {
return svc != nil && svc . Annotations [ AnnotationExpose ] == "true"
}
2023-09-20 16:51:50 +01:00
// hasTailnetTargetAnnotation returns the value of tailscale.com/tailnet-ip
// annotation or of the deprecated tailscale.com/ts-tailnet-target-ip
// annotation. If neither is set, it returns an empty string. If both are set,
// it returns the value of the new annotation.
func ( a * ServiceReconciler ) tailnetTargetAnnotation ( svc * corev1 . Service ) string {
if svc == nil {
return ""
}
if ip := svc . Annotations [ AnnotationTailnetTargetIP ] ; ip != "" {
return ip
}
return svc . Annotations [ annotationTailnetTargetIPOld ]
2023-08-23 16:35:12 +01:00
}