2023-01-27 21:37:20 +00:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2022-12-12 19:15:34 +00:00
// tailscale-operator provides a way to expose services running in a Kubernetes
// cluster to your Tailnet.
package main
import (
"context"
2023-03-23 18:37:26 +00:00
"crypto/tls"
2022-12-12 19:15:34 +00:00
_ "embed"
"fmt"
2023-03-23 18:37:26 +00:00
"net/http"
2022-12-12 19:15:34 +00:00
"os"
"strings"
"time"
2022-12-13 23:37:35 +00:00
"github.com/go-logr/zapr"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
2022-12-12 19:15:34 +00:00
"golang.org/x/exp/slices"
2022-12-14 20:21:16 +00:00
"golang.org/x/oauth2/clientcredentials"
2022-12-12 19:15:34 +00:00
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
2023-03-23 18:37:26 +00:00
"k8s.io/client-go/transport"
2022-12-12 19:15:34 +00:00
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
2022-12-13 23:37:35 +00:00
kzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
2022-12-12 19:15:34 +00:00
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/yaml"
"tailscale.com/client/tailscale"
2023-02-23 02:26:17 +00:00
"tailscale.com/hostinfo"
2022-12-14 20:21:16 +00:00
"tailscale.com/ipn"
2022-12-12 19:15:34 +00:00
"tailscale.com/ipn/store/kubestore"
"tailscale.com/tsnet"
"tailscale.com/types/logger"
2023-02-23 02:26:17 +00:00
"tailscale.com/types/opt"
2023-01-25 18:16:59 +00:00
"tailscale.com/util/dnsname"
2023-04-07 00:01:35 +01:00
"tailscale.com/version"
2022-12-12 19:15:34 +00:00
)
func main ( ) {
2022-12-14 20:21:16 +00:00
// Required to use our client API. We're fine with the instability since the
// client lives in the same repo as this code.
tailscale . I_Acknowledge_This_API_Is_Unstable = true
2022-12-13 05:00:10 +00:00
var (
2023-02-03 22:47:52 +00:00
hostname = defaultEnv ( "OPERATOR_HOSTNAME" , "tailscale-operator" )
kubeSecret = defaultEnv ( "OPERATOR_SECRET" , "" )
operatorTags = defaultEnv ( "OPERATOR_INITIAL_TAGS" , "tag:k8s-operator" )
tsNamespace = defaultEnv ( "OPERATOR_NAMESPACE" , "" )
tslogging = defaultEnv ( "OPERATOR_LOGGING" , "info" )
clientIDPath = defaultEnv ( "CLIENT_ID_FILE" , "" )
clientSecretPath = defaultEnv ( "CLIENT_SECRET_FILE" , "" )
image = defaultEnv ( "PROXY_IMAGE" , "tailscale/tailscale:latest" )
2023-05-17 11:54:27 +01:00
priorityClassName = defaultEnv ( "PROXY_PRIORITY_CLASS_NAME" , "" )
2023-02-03 22:47:52 +00:00
tags = defaultEnv ( "PROXY_TAGS" , "tag:k8s" )
2023-02-23 02:26:17 +00:00
shouldRunAuthProxy = defaultBool ( "AUTH_PROXY" , false )
2022-12-13 05:00:10 +00:00
)
2022-12-13 23:37:35 +00:00
var opts [ ] kzap . Opts
switch tslogging {
case "info" :
opts = append ( opts , kzap . Level ( zapcore . InfoLevel ) )
case "debug" :
opts = append ( opts , kzap . Level ( zapcore . DebugLevel ) )
case "dev" :
opts = append ( opts , kzap . UseDevMode ( true ) , kzap . Level ( zapcore . DebugLevel ) )
}
zlog := kzap . NewRaw ( opts ... ) . Sugar ( )
logf . SetLogger ( zapr . NewLogger ( zlog . Desugar ( ) ) )
startlog := zlog . Named ( "startup" )
2022-12-14 20:21:16 +00:00
if clientIDPath == "" || clientSecretPath == "" {
startlog . Fatalf ( "CLIENT_ID_FILE and CLIENT_SECRET_FILE must be set" )
}
clientID , err := os . ReadFile ( clientIDPath )
if err != nil {
startlog . Fatalf ( "reading client ID %q: %v" , clientIDPath , err )
}
clientSecret , err := os . ReadFile ( clientSecretPath )
if err != nil {
startlog . Fatalf ( "reading client secret %q: %v" , clientSecretPath , err )
}
credentials := clientcredentials . Config {
ClientID : string ( clientID ) ,
ClientSecret : string ( clientSecret ) ,
TokenURL : "https://login.tailscale.com/api/v2/oauth/token" ,
}
tsClient := tailscale . NewClient ( "-" , nil )
tsClient . HTTPClient = credentials . Client ( context . Background ( ) )
2023-02-23 02:26:17 +00:00
if shouldRunAuthProxy {
2023-02-27 17:58:54 +00:00
hostinfo . SetApp ( "k8s-operator-proxy" )
2023-02-23 02:26:17 +00:00
} else {
2023-02-27 17:58:54 +00:00
hostinfo . SetApp ( "k8s-operator" )
2023-02-23 02:26:17 +00:00
}
2022-12-12 19:15:34 +00:00
s := & tsnet . Server {
Hostname : hostname ,
2022-12-13 23:37:35 +00:00
Logf : zlog . Named ( "tailscaled" ) . Debugf ,
2022-12-12 19:15:34 +00:00
}
if kubeSecret != "" {
st , err := kubestore . New ( logger . Discard , kubeSecret )
if err != nil {
2022-12-13 23:37:35 +00:00
startlog . Fatalf ( "creating kube store: %v" , err )
2022-12-12 19:15:34 +00:00
}
s . Store = st
}
if err := s . Start ( ) ; err != nil {
2022-12-13 23:37:35 +00:00
startlog . Fatalf ( "starting tailscale server: %v" , err )
2022-12-12 19:15:34 +00:00
}
defer s . Close ( )
lc , err := s . LocalClient ( )
if err != nil {
2022-12-13 23:37:35 +00:00
startlog . Fatalf ( "getting local client: %v" , err )
2022-12-12 19:15:34 +00:00
}
ctx := context . Background ( )
2022-12-14 20:21:16 +00:00
loginDone := false
2022-12-12 19:15:34 +00:00
machineAuthShown := false
waitOnline :
for {
2022-12-14 20:21:16 +00:00
startlog . Debugf ( "querying tailscaled status" )
2022-12-12 19:15:34 +00:00
st , err := lc . StatusWithoutPeers ( ctx )
if err != nil {
2022-12-13 23:37:35 +00:00
startlog . Fatalf ( "getting status: %v" , err )
2022-12-12 19:15:34 +00:00
}
switch st . BackendState {
case "Running" :
break waitOnline
case "NeedsLogin" :
2022-12-14 20:21:16 +00:00
if loginDone {
break
}
caps := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
Reusable : false ,
Preauthorized : true ,
Tags : strings . Split ( operatorTags , "," ) ,
} ,
} ,
}
2023-05-13 02:50:30 +01:00
authkey , _ , err := tsClient . CreateKey ( ctx , caps )
2022-12-14 20:21:16 +00:00
if err != nil {
startlog . Fatalf ( "creating operator authkey: %v" , err )
2022-12-12 19:15:34 +00:00
}
2022-12-14 20:21:16 +00:00
if err := lc . Start ( ctx , ipn . Options {
AuthKey : authkey ,
} ) ; err != nil {
startlog . Fatalf ( "starting tailscale: %v" , err )
}
if err := lc . StartLoginInteractive ( ctx ) ; err != nil {
startlog . Fatalf ( "starting login: %v" , err )
}
startlog . Debugf ( "requested login by authkey" )
loginDone = true
2022-12-12 19:15:34 +00:00
case "NeedsMachineAuth" :
if ! machineAuthShown {
2023-03-01 19:16:42 +00:00
startlog . Infof ( "Machine approval required, please visit the admin panel to approve" )
2022-12-12 19:15:34 +00:00
machineAuthShown = true
}
default :
2022-12-13 23:37:35 +00:00
startlog . Debugf ( "waiting for tailscale to start: %v" , st . BackendState )
2022-12-12 19:15:34 +00:00
}
time . Sleep ( time . Second )
}
// For secrets and statefulsets, we only get permission to touch the objects
// in the controller's own namespace. This cannot be expressed by
// .Watches(...) below, instead you have to add a per-type field selector to
// the cache that sits a few layers below the builder stuff, which will
// implicitly filter what parts of the world the builder code gets to see at
// all.
2023-05-19 17:44:12 +01:00
nsFilter := cache . ByObject {
Field : client . InNamespace ( tsNamespace ) . AsSelector ( ) ,
2022-12-12 19:15:34 +00:00
}
2023-02-03 22:47:52 +00:00
restConfig := config . GetConfigOrDie ( )
mgr , err := manager . New ( restConfig , manager . Options {
2023-05-19 17:44:12 +01:00
Cache : cache . Options {
ByObject : map [ client . Object ] cache . ByObject {
2022-12-12 19:15:34 +00:00
& corev1 . Secret { } : nsFilter ,
& appsv1 . StatefulSet { } : nsFilter ,
} ,
2023-05-19 17:44:12 +01:00
} ,
2022-12-12 19:15:34 +00:00
} )
if err != nil {
2022-12-13 23:37:35 +00:00
startlog . Fatalf ( "could not create manager: %v" , err )
2022-12-12 19:15:34 +00:00
}
2022-12-14 20:21:16 +00:00
2023-01-19 20:42:09 +00:00
sr := & ServiceReconciler {
2023-05-17 11:54:27 +01:00
Client : mgr . GetClient ( ) ,
tsClient : tsClient ,
defaultTags : strings . Split ( tags , "," ) ,
operatorNamespace : tsNamespace ,
proxyImage : image ,
proxyPriorityClassName : priorityClassName ,
logger : zlog . Named ( "service-reconciler" ) ,
2023-01-19 20:42:09 +00:00
}
2023-05-19 17:44:12 +01:00
reconcileFilter := handler . EnqueueRequestsFromMapFunc ( func ( _ context . Context , o client . Object ) [ ] reconcile . Request {
2022-12-12 19:15:34 +00:00
ls := o . GetLabels ( )
if ls [ LabelManaged ] != "true" {
return nil
}
if ls [ LabelParentType ] != "svc" {
return nil
}
return [ ] reconcile . Request {
{
NamespacedName : types . NamespacedName {
Namespace : ls [ LabelParentNamespace ] ,
Name : ls [ LabelParentName ] ,
} ,
} ,
}
} )
err = builder .
ControllerManagedBy ( mgr ) .
For ( & corev1 . Service { } ) .
2023-05-19 17:44:12 +01:00
Watches ( & appsv1 . StatefulSet { } , reconcileFilter ) .
Watches ( & corev1 . Secret { } , reconcileFilter ) .
2022-12-12 19:15:34 +00:00
Complete ( sr )
if err != nil {
2022-12-13 23:37:35 +00:00
startlog . Fatalf ( "could not create controller: %v" , err )
2022-12-12 19:15:34 +00:00
}
2023-04-07 00:01:35 +01:00
startlog . Infof ( "Startup complete, operator running, version: %s" , version . Long ( ) )
2023-02-23 02:26:17 +00:00
if shouldRunAuthProxy {
2023-03-23 18:37:26 +00:00
cfg , err := restConfig . TransportConfig ( )
2023-02-03 22:47:52 +00:00
if err != nil {
2023-03-23 18:37:26 +00:00
startlog . Fatalf ( "could not get rest.TransportConfig(): %v" , err )
}
// Kubernetes uses SPDY for exec and port-forward, however SPDY is
// incompatible with HTTP/2; so disable HTTP/2 in the proxy.
tr := http . DefaultTransport . ( * http . Transport ) . Clone ( )
tr . TLSClientConfig , err = transport . TLSConfigFor ( cfg )
if err != nil {
startlog . Fatalf ( "could not get transport.TLSConfigFor(): %v" , err )
}
tr . TLSNextProto = make ( map [ string ] func ( authority string , c * tls . Conn ) http . RoundTripper )
rt , err := transport . HTTPWrappersForConfig ( cfg , tr )
if err != nil {
startlog . Fatalf ( "could not get rest.TransportConfig(): %v" , err )
2023-02-03 22:47:52 +00:00
}
2023-03-13 19:06:24 +00:00
go runAuthProxy ( s , rt , zlog . Named ( "auth-proxy" ) . Infof )
2023-02-03 22:47:52 +00:00
}
2022-12-12 19:15:34 +00:00
if err := mgr . Start ( signals . SetupSignalHandler ( ) ) ; err != nil {
2022-12-13 23:37:35 +00:00
startlog . Fatalf ( "could not start manager: %v" , err )
2022-12-12 19:15:34 +00:00
}
}
const (
LabelManaged = "tailscale.com/managed"
LabelParentType = "tailscale.com/parent-resource-type"
LabelParentName = "tailscale.com/parent-resource"
LabelParentNamespace = "tailscale.com/parent-resource-ns"
FinalizerName = "tailscale.com/finalizer"
2023-01-25 18:16:59 +00:00
AnnotationExpose = "tailscale.com/expose"
AnnotationTags = "tailscale.com/tags"
AnnotationHostname = "tailscale.com/hostname"
2022-12-12 19:15:34 +00:00
)
// ServiceReconciler is a simple ControllerManagedBy example implementation.
type ServiceReconciler struct {
client . Client
2023-05-17 11:54:27 +01:00
tsClient tsClient
defaultTags [ ] string
operatorNamespace string
proxyImage string
proxyPriorityClassName string
logger * zap . SugaredLogger
2022-12-12 23:37:20 +00:00
}
type tsClient interface {
2023-05-13 02:50:30 +01:00
CreateKey ( ctx context . Context , caps tailscale . KeyCapabilities ) ( string , * tailscale . Key , error )
2022-12-13 05:00:10 +00:00
DeleteDevice ( ctx context . Context , id string ) error
2022-12-12 19:15:34 +00:00
}
func childResourceLabels ( parent * corev1 . Service ) map [ string ] string {
// You might wonder why we're using owner references, since they seem to be
// built for exactly this. Unfortunately, Kubernetes does not support
// cross-namespace ownership, by design. This means we cannot make the
// service being exposed the owner of the implementation details of the
// proxying. Instead, we have to do our own filtering and tracking with
// labels.
return map [ string ] string {
LabelManaged : "true" ,
LabelParentName : parent . GetName ( ) ,
LabelParentNamespace : parent . GetNamespace ( ) ,
LabelParentType : "svc" ,
}
}
2022-12-14 19:20:59 +00:00
func ( a * ServiceReconciler ) Reconcile ( ctx context . Context , req reconcile . Request ) ( _ reconcile . Result , err error ) {
logger := a . logger . With ( "service-ns" , req . Namespace , "service-name" , req . Name )
logger . Debugf ( "starting reconcile" )
defer logger . Debugf ( "reconcile finished" )
svc := new ( corev1 . Service )
err = a . Get ( ctx , req . NamespacedName , svc )
if apierrors . IsNotFound ( err ) {
// Request object not found, could have been deleted after reconcile request.
logger . Debugf ( "service not found, assuming it was deleted" )
return reconcile . Result { } , nil
} else if err != nil {
return reconcile . Result { } , fmt . Errorf ( "failed to get svc: %w" , err )
}
if ! svc . DeletionTimestamp . IsZero ( ) || ! a . shouldExpose ( svc ) {
logger . Debugf ( "service is being deleted or should not be exposed, cleaning up" )
return reconcile . Result { } , a . maybeCleanup ( ctx , logger , svc )
}
return reconcile . Result { } , a . maybeProvision ( ctx , logger , svc )
}
// maybeCleanup removes any existing resources related to serving svc over tailscale.
2022-12-12 19:15:34 +00:00
//
// This function is responsible for removing the finalizer from the service,
// once all associated resources are gone.
2022-12-14 19:20:59 +00:00
func ( a * ServiceReconciler ) maybeCleanup ( ctx context . Context , logger * zap . SugaredLogger , svc * corev1 . Service ) error {
2022-12-12 19:15:34 +00:00
ix := slices . Index ( svc . Finalizers , FinalizerName )
if ix < 0 {
2022-12-13 23:37:35 +00:00
logger . Debugf ( "no finalizer, nothing to do" )
2022-12-14 19:20:59 +00:00
return nil
2022-12-12 19:15:34 +00:00
}
ml := childResourceLabels ( svc )
// Need to delete the StatefulSet first, and delete it with foreground
// cascading deletion. That way, the pod that's writing to the Secret will
// stop running before we start looking at the Secret's contents, and
// assuming k8s ordering semantics don't mess with us, that should avoid
// tailscale device deletion races where we fail to notice a device that
// should be removed.
2022-12-13 05:00:10 +00:00
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , ml )
2022-12-12 19:15:34 +00:00
if err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "getting statefulset: %w" , err )
2022-12-12 19:15:34 +00:00
}
if sts != nil {
if ! sts . GetDeletionTimestamp ( ) . IsZero ( ) {
2022-12-13 21:32:05 +00:00
// Deletion in progress, check again later. We'll get another
// notification when the deletion is complete.
2022-12-13 23:37:35 +00:00
logger . Debugf ( "waiting for statefulset %s/%s deletion" , sts . GetNamespace ( ) , sts . GetName ( ) )
2022-12-14 19:20:59 +00:00
return nil
2022-12-12 19:15:34 +00:00
}
2022-12-13 05:00:10 +00:00
err := a . DeleteAllOf ( ctx , & appsv1 . StatefulSet { } , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( ml ) , client . PropagationPolicy ( metav1 . DeletePropagationForeground ) )
2022-12-12 19:15:34 +00:00
if err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "deleting statefulset: %w" , err )
2022-12-12 19:15:34 +00:00
}
2022-12-13 23:37:35 +00:00
logger . Debugf ( "started deletion of statefulset %s/%s" , sts . GetNamespace ( ) , sts . GetName ( ) )
2022-12-14 19:20:59 +00:00
return nil
2022-12-12 19:15:34 +00:00
}
id , _ , err := a . getDeviceInfo ( ctx , svc )
if err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "getting device info: %w" , err )
2022-12-12 19:15:34 +00:00
}
if id != "" {
// TODO: handle case where the device is already deleted, but the secret
// is still around.
if err := a . tsClient . DeleteDevice ( ctx , id ) ; err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "deleting device: %w" , err )
2022-12-12 19:15:34 +00:00
}
}
types := [ ] client . Object {
& corev1 . Service { } ,
& corev1 . Secret { } ,
}
for _ , typ := range types {
2022-12-13 05:00:10 +00:00
if err := a . DeleteAllOf ( ctx , typ , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( ml ) ) ; err != nil {
2022-12-14 19:20:59 +00:00
return err
2022-12-12 19:15:34 +00:00
}
}
svc . Finalizers = append ( svc . Finalizers [ : ix ] , svc . Finalizers [ ix + 1 : ] ... )
if err := a . Update ( ctx , svc ) ; err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to remove finalizer: %w" , err )
2022-12-12 19:15:34 +00:00
}
2022-12-13 23:37:35 +00:00
// Unlike most log entries in the reconcile loop, this will get printed
// exactly once at the very end of cleanup, because the final step of
// cleanup removes the tailscale finalizer, which will make all future
// reconciles exit early.
logger . Infof ( "unexposed service from tailnet" )
2022-12-14 19:20:59 +00:00
return nil
2022-12-12 19:15:34 +00:00
}
2022-12-14 19:20:59 +00:00
// maybeProvision ensures that svc is exposed over tailscale, taking any actions
// necessary to reach that state.
//
// This function adds a finalizer to svc, ensuring that we can handle orderly
// deprovisioning later.
func ( a * ServiceReconciler ) maybeProvision ( ctx context . Context , logger * zap . SugaredLogger , svc * corev1 . Service ) error {
2023-01-25 18:16:59 +00:00
hostname , err := nameForService ( svc )
if err != nil {
return err
}
2022-12-12 19:15:34 +00:00
if ! slices . Contains ( svc . Finalizers , FinalizerName ) {
2022-12-13 23:37:35 +00:00
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
// this is a nice place to tell the operator that the high level,
// multi-reconcile operation is underway.
logger . Infof ( "exposing service over tailscale" )
2022-12-12 19:15:34 +00:00
svc . Finalizers = append ( svc . Finalizers , FinalizerName )
if err := a . Update ( ctx , svc ) ; err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to add finalizer: %w" , err )
2022-12-12 19:15:34 +00:00
}
}
// Do full reconcile.
2022-12-13 23:37:35 +00:00
hsvc , err := a . reconcileHeadlessService ( ctx , logger , svc )
2022-12-12 19:15:34 +00:00
if err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to reconcile headless service: %w" , err )
2022-12-12 19:15:34 +00:00
}
tags := a . defaultTags
if tstr , ok := svc . Annotations [ AnnotationTags ] ; ok {
tags = strings . Split ( tstr , "," )
}
2022-12-13 23:37:35 +00:00
secretName , err := a . createOrGetSecret ( ctx , logger , svc , hsvc , tags )
2022-12-12 19:15:34 +00:00
if err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to create or get API key secret: %w" , err )
2022-12-12 19:15:34 +00:00
}
2023-01-25 18:16:59 +00:00
_ , err = a . reconcileSTS ( ctx , logger , svc , hsvc , secretName , hostname )
2022-12-12 19:15:34 +00:00
if err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to reconcile statefulset: %w" , err )
2022-12-12 19:15:34 +00:00
}
if ! a . hasLoadBalancerClass ( svc ) {
2022-12-13 23:37:35 +00:00
logger . Debugf ( "service is not a LoadBalancer, so not updating ingress" )
2022-12-14 19:20:59 +00:00
return nil
2022-12-12 19:15:34 +00:00
}
_ , tsHost , err := a . getDeviceInfo ( ctx , svc )
if err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to get device ID: %w" , err )
2022-12-12 19:15:34 +00:00
}
if tsHost == "" {
2022-12-13 23:37:35 +00:00
logger . Debugf ( "no Tailscale hostname known yet, waiting for proxy pod to finish auth" )
2022-12-12 19:15:34 +00:00
// No hostname yet. Wait for the proxy pod to auth.
svc . Status . LoadBalancer . Ingress = nil
if err := a . Status ( ) . Update ( ctx , svc ) ; err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to update service status: %w" , err )
2022-12-12 19:15:34 +00:00
}
2022-12-14 19:20:59 +00:00
return nil
2022-12-12 19:15:34 +00:00
}
2022-12-13 23:37:35 +00:00
logger . Debugf ( "setting ingress hostname to %q" , tsHost )
2022-12-12 19:15:34 +00:00
svc . Status . LoadBalancer . Ingress = [ ] corev1 . LoadBalancerIngress {
{
Hostname : tsHost ,
} ,
}
if err := a . Status ( ) . Update ( ctx , svc ) ; err != nil {
2022-12-14 19:20:59 +00:00
return fmt . Errorf ( "failed to update service status: %w" , err )
2022-12-12 19:15:34 +00:00
}
2022-12-14 19:20:59 +00:00
return nil
}
func ( a * ServiceReconciler ) shouldExpose ( svc * corev1 . Service ) bool {
// Headless services can't be exposed, since there is no ClusterIP to
// forward to.
if svc . Spec . ClusterIP == "" || svc . Spec . ClusterIP == "None" {
return false
}
return a . hasLoadBalancerClass ( svc ) || a . hasAnnotation ( svc )
}
func ( a * ServiceReconciler ) hasLoadBalancerClass ( svc * corev1 . Service ) bool {
return svc != nil &&
svc . Spec . Type == corev1 . ServiceTypeLoadBalancer &&
svc . Spec . LoadBalancerClass != nil &&
* svc . Spec . LoadBalancerClass == "tailscale"
}
func ( a * ServiceReconciler ) hasAnnotation ( svc * corev1 . Service ) bool {
return svc != nil &&
svc . Annotations [ AnnotationExpose ] == "true"
2022-12-12 19:15:34 +00:00
}
2022-12-13 23:37:35 +00:00
func ( a * ServiceReconciler ) reconcileHeadlessService ( ctx context . Context , logger * zap . SugaredLogger , svc * corev1 . Service ) ( * corev1 . Service , error ) {
2022-12-12 19:15:34 +00:00
hsvc := & corev1 . Service {
ObjectMeta : metav1 . ObjectMeta {
GenerateName : "ts-" + svc . Name + "-" ,
2022-12-13 05:00:10 +00:00
Namespace : a . operatorNamespace ,
2022-12-12 19:15:34 +00:00
Labels : childResourceLabels ( svc ) ,
} ,
Spec : corev1 . ServiceSpec {
ClusterIP : "None" ,
Selector : map [ string ] string {
"app" : string ( svc . UID ) ,
} ,
} ,
}
2022-12-13 23:37:35 +00:00
logger . Debugf ( "reconciling headless service for StatefulSet" )
2022-12-13 05:00:10 +00:00
return createOrUpdate ( ctx , a . Client , a . operatorNamespace , hsvc , func ( svc * corev1 . Service ) { svc . Spec = hsvc . Spec } )
2022-12-12 19:15:34 +00:00
}
2022-12-13 23:37:35 +00:00
func ( a * ServiceReconciler ) createOrGetSecret ( ctx context . Context , logger * zap . SugaredLogger , svc , hsvc * corev1 . Service , tags [ ] string ) ( string , error ) {
2022-12-12 19:15:34 +00:00
secret := & corev1 . Secret {
ObjectMeta : metav1 . ObjectMeta {
// Hardcode a -0 suffix so that in future, if we support
// multiple StatefulSet replicas, we can provision -N for
// those.
Name : hsvc . Name + "-0" ,
2022-12-13 05:00:10 +00:00
Namespace : a . operatorNamespace ,
2022-12-12 19:15:34 +00:00
Labels : childResourceLabels ( svc ) ,
} ,
}
if err := a . Get ( ctx , client . ObjectKeyFromObject ( secret ) , secret ) ; err == nil {
2022-12-13 23:37:35 +00:00
logger . Debugf ( "secret %s/%s already exists" , secret . GetNamespace ( ) , secret . GetName ( ) )
2022-12-12 19:15:34 +00:00
return secret . Name , nil
} else if ! apierrors . IsNotFound ( err ) {
return "" , err
}
// Secret doesn't exist yet, create one. Initially it contains
// only the Tailscale authkey, but once Tailscale starts it'll
// also store the daemon state.
2022-12-13 05:00:10 +00:00
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , childResourceLabels ( svc ) )
2022-12-12 19:15:34 +00:00
if err != nil {
return "" , err
}
if sts != nil {
// StatefulSet exists, so we have already created the secret.
// If the secret is missing, they should delete the StatefulSet.
2022-12-13 23:37:35 +00:00
logger . Errorf ( "Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet." , sts . GetNamespace ( ) , sts . GetName ( ) )
2022-12-12 19:15:34 +00:00
return "" , nil
}
// Create API Key secret which is going to be used by the statefulset
// to authenticate with Tailscale.
2022-12-13 23:37:35 +00:00
logger . Debugf ( "creating authkey for new tailscale proxy" )
2022-12-12 19:15:34 +00:00
authKey , err := a . newAuthKey ( ctx , tags )
if err != nil {
return "" , err
}
secret . StringData = map [ string ] string {
2022-12-12 23:37:20 +00:00
"authkey" : authKey ,
2022-12-12 19:15:34 +00:00
}
if err := a . Create ( ctx , secret ) ; err != nil {
return "" , err
}
return secret . Name , nil
}
func ( a * ServiceReconciler ) getDeviceInfo ( ctx context . Context , svc * corev1 . Service ) ( id , hostname string , err error ) {
2022-12-13 05:00:10 +00:00
sec , err := getSingleObject [ corev1 . Secret ] ( ctx , a . Client , a . operatorNamespace , childResourceLabels ( svc ) )
2022-12-12 19:15:34 +00:00
if err != nil {
return "" , "" , err
}
2023-05-12 02:21:23 +01:00
if sec == nil {
return "" , "" , nil
}
2022-12-12 19:15:34 +00:00
id = string ( sec . Data [ "device_id" ] )
if id == "" {
return "" , "" , nil
}
// Kubernetes chokes on well-formed FQDNs with the trailing dot, so we have
// to remove it.
hostname = strings . TrimSuffix ( string ( sec . Data [ "device_fqdn" ] ) , "." )
if hostname == "" {
return "" , "" , nil
}
return id , hostname , nil
}
2022-12-12 23:37:20 +00:00
func ( a * ServiceReconciler ) newAuthKey ( ctx context . Context , tags [ ] string ) ( string , error ) {
caps := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
Reusable : false ,
Preauthorized : true ,
Tags : tags ,
} ,
} ,
2022-12-12 19:15:34 +00:00
}
2023-05-12 06:05:18 +01:00
2023-05-13 02:50:30 +01:00
key , _ , err := a . tsClient . CreateKey ( ctx , caps )
2022-12-12 19:15:34 +00:00
if err != nil {
2022-12-12 23:37:20 +00:00
return "" , err
2022-12-12 19:15:34 +00:00
}
2022-12-12 23:37:20 +00:00
return key , nil
2022-12-12 19:15:34 +00:00
}
//go:embed manifests/proxy.yaml
var proxyYaml [ ] byte
2023-01-25 18:16:59 +00:00
func ( a * ServiceReconciler ) reconcileSTS ( ctx context . Context , logger * zap . SugaredLogger , parentSvc , headlessSvc * corev1 . Service , authKeySecret , hostname string ) ( * appsv1 . StatefulSet , error ) {
2022-12-12 19:15:34 +00:00
var ss appsv1 . StatefulSet
if err := yaml . Unmarshal ( proxyYaml , & ss ) ; err != nil {
return nil , fmt . Errorf ( "failed to unmarshal proxy spec: %w" , err )
}
container := & ss . Spec . Template . Spec . Containers [ 0 ]
2022-12-13 05:00:10 +00:00
container . Image = a . proxyImage
2022-12-12 19:15:34 +00:00
container . Env = append ( container . Env ,
corev1 . EnvVar {
Name : "TS_DEST_IP" ,
Value : parentSvc . Spec . ClusterIP ,
} ,
corev1 . EnvVar {
Name : "TS_KUBE_SECRET" ,
Value : authKeySecret ,
2023-01-25 18:16:59 +00:00
} ,
corev1 . EnvVar {
Name : "TS_HOSTNAME" ,
Value : hostname ,
2022-12-12 19:15:34 +00:00
} )
ss . ObjectMeta = metav1 . ObjectMeta {
Name : headlessSvc . Name ,
2022-12-13 05:00:10 +00:00
Namespace : a . operatorNamespace ,
2022-12-12 19:15:34 +00:00
Labels : childResourceLabels ( parentSvc ) ,
}
ss . Spec . ServiceName = headlessSvc . Name
ss . Spec . Selector = & metav1 . LabelSelector {
MatchLabels : map [ string ] string {
"app" : string ( parentSvc . UID ) ,
} ,
}
ss . Spec . Template . ObjectMeta . Labels = map [ string ] string {
"app" : string ( parentSvc . UID ) ,
}
2023-05-17 11:54:27 +01:00
ss . Spec . Template . Spec . PriorityClassName = a . proxyPriorityClassName
2022-12-13 23:37:35 +00:00
logger . Debugf ( "reconciling statefulset %s/%s" , ss . GetNamespace ( ) , ss . GetName ( ) )
2022-12-13 05:00:10 +00:00
return createOrUpdate ( ctx , a . Client , a . operatorNamespace , & ss , func ( s * appsv1 . StatefulSet ) { s . Spec = ss . Spec } )
2022-12-12 19:15:34 +00:00
}
// ptrObject is a type constraint for pointer types that implement
// client.Object.
type ptrObject [ T any ] interface {
client . Object
* T
}
// createOrUpdate adds obj to the k8s cluster, unless the object already exists,
// in which case update is called to make changes to it. If update is nil, the
// existing object is returned unmodified.
//
// obj is looked up by its Name and Namespace if Name is set, otherwise it's
// looked up by labels.
2022-12-13 05:00:10 +00:00
func createOrUpdate [ T any , O ptrObject [ T ] ] ( ctx context . Context , c client . Client , ns string , obj O , update func ( O ) ) ( O , error ) {
2022-12-12 19:15:34 +00:00
var (
existing O
err error
)
if obj . GetName ( ) != "" {
existing = new ( T )
existing . SetName ( obj . GetName ( ) )
existing . SetNamespace ( obj . GetNamespace ( ) )
err = c . Get ( ctx , client . ObjectKeyFromObject ( obj ) , existing )
} else {
2022-12-13 05:00:10 +00:00
existing , err = getSingleObject [ T , O ] ( ctx , c , ns , obj . GetLabels ( ) )
2022-12-12 19:15:34 +00:00
}
if err == nil && existing != nil {
if update != nil {
update ( existing )
if err := c . Update ( ctx , existing ) ; err != nil {
return nil , err
}
}
return existing , nil
}
if err != nil && ! apierrors . IsNotFound ( err ) {
return nil , fmt . Errorf ( "failed to get object: %w" , err )
}
if err := c . Create ( ctx , obj ) ; err != nil {
return nil , err
}
return obj , nil
}
// getSingleObject searches for k8s objects of type T
// (e.g. corev1.Service) with the given labels, and returns
// it. Returns nil if no objects match the labels, and an error if
// more than one object matches.
2022-12-13 05:00:10 +00:00
func getSingleObject [ T any , O ptrObject [ T ] ] ( ctx context . Context , c client . Client , ns string , labels map [ string ] string ) ( O , error ) {
2022-12-12 19:15:34 +00:00
ret := O ( new ( T ) )
kinds , _ , err := c . Scheme ( ) . ObjectKinds ( ret )
if err != nil {
return nil , err
}
if len ( kinds ) != 1 {
// TODO: the runtime package apparently has a "pick the best
// GVK" function somewhere that might be good enough?
return nil , fmt . Errorf ( "more than 1 GroupVersionKind for %T" , ret )
}
gvk := kinds [ 0 ]
gvk . Kind += "List"
lst := unstructured . UnstructuredList { }
lst . SetGroupVersionKind ( gvk )
2022-12-13 05:00:10 +00:00
if err := c . List ( ctx , & lst , client . InNamespace ( ns ) , client . MatchingLabels ( labels ) ) ; err != nil {
2022-12-12 19:15:34 +00:00
return nil , err
}
if len ( lst . Items ) == 0 {
return nil , nil
}
if len ( lst . Items ) > 1 {
return nil , fmt . Errorf ( "found multiple matching %T objects" , ret )
}
if err := c . Scheme ( ) . Convert ( & lst . Items [ 0 ] , ret , nil ) ; err != nil {
return nil , err
}
return ret , nil
}
2023-02-23 02:26:17 +00:00
func defaultBool ( envName string , defVal bool ) bool {
vs := os . Getenv ( envName )
if vs == "" {
return defVal
}
v , _ := opt . Bool ( vs ) . Get ( )
return v
}
2022-12-12 19:15:34 +00:00
func defaultEnv ( envName , defVal string ) string {
v := os . Getenv ( envName )
if v == "" {
return defVal
}
return v
}
2023-01-25 18:16:59 +00:00
func nameForService ( svc * corev1 . Service ) ( string , error ) {
if h , ok := svc . Annotations [ AnnotationHostname ] ; ok {
if err := dnsname . ValidLabel ( h ) ; err != nil {
return "" , fmt . Errorf ( "invalid Tailscale hostname %q: %w" , h , err )
}
return h , nil
}
return svc . Namespace + "-" + svc . Name , nil
}