2023-08-23 16:35:12 +01:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2023-08-24 23:02:42 +01:00
//go:build !plan9
2023-08-23 16:35:12 +01:00
package main
import (
"context"
2024-01-09 14:13:22 +00:00
"crypto/sha256"
2023-08-23 16:35:12 +01:00
_ "embed"
2023-08-24 20:18:17 +01:00
"encoding/json"
2023-10-23 16:22:55 +01:00
"errors"
2023-08-23 16:35:12 +01:00
"fmt"
2023-10-23 16:22:55 +01:00
"net/http"
2023-08-23 16:35:12 +01:00
"os"
2024-02-13 05:27:54 +00:00
"slices"
2023-08-23 16:35:12 +01:00
"strings"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2024-02-13 05:27:54 +00:00
"k8s.io/apimachinery/pkg/types"
2023-11-21 10:20:37 +00:00
"k8s.io/apiserver/pkg/storage/names"
2023-08-23 16:35:12 +01:00
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/yaml"
"tailscale.com/client/tailscale"
2023-08-24 20:18:17 +01:00
"tailscale.com/ipn"
2024-02-13 05:27:54 +00:00
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
2024-09-08 20:57:29 +01:00
"tailscale.com/kube/kubetypes"
2024-01-09 14:13:22 +00:00
"tailscale.com/net/netutil"
2023-08-23 16:35:12 +01:00
"tailscale.com/tailcfg"
"tailscale.com/types/opt"
2024-03-26 16:20:32 +00:00
"tailscale.com/types/ptr"
2023-08-24 20:18:17 +01:00
"tailscale.com/util/mak"
2023-08-23 16:35:12 +01:00
)
const (
2024-02-13 05:27:54 +00:00
// Labels that the operator sets on StatefulSets and Pods. If you add a
// new label here, do also add it to tailscaleManagedLabels var to
// ensure that it does not get overwritten by ProxyClass configuration.
2023-08-23 16:35:12 +01:00
LabelManaged = "tailscale.com/managed"
LabelParentType = "tailscale.com/parent-resource-type"
LabelParentName = "tailscale.com/parent-resource"
LabelParentNamespace = "tailscale.com/parent-resource-ns"
2024-10-07 14:58:45 +01:00
labelSecretType = "tailscale.com/secret-type" // "config" or "state".
2023-08-23 16:35:12 +01:00
2024-10-07 20:12:56 +01:00
// LabelProxyClass can be set by users on tailscale Ingresses and Services that define cluster ingress or
// cluster egress, to specify that configuration in this ProxyClass should be applied to resources created for
// the Ingress or Service.
2024-02-13 05:27:54 +00:00
LabelProxyClass = "tailscale.com/proxy-class"
2023-08-23 16:35:12 +01:00
FinalizerName = "tailscale.com/finalizer"
2023-08-24 20:16:58 +01:00
// Annotations settable by users on services.
2023-09-20 16:51:50 +01:00
AnnotationExpose = "tailscale.com/expose"
AnnotationTags = "tailscale.com/tags"
AnnotationHostname = "tailscale.com/hostname"
annotationTailnetTargetIPOld = "tailscale.com/ts-tailnet-target-ip"
AnnotationTailnetTargetIP = "tailscale.com/tailnet-ip"
2023-11-24 16:24:48 +00:00
//MagicDNS name of tailnet node.
AnnotationTailnetTargetFQDN = "tailscale.com/tailnet-fqdn"
2023-08-24 20:16:58 +01:00
2024-10-04 13:11:35 +01:00
AnnotationProxyGroup = "tailscale.com/proxy-group"
2023-08-24 20:18:17 +01:00
// Annotations settable by users on ingresses.
AnnotationFunnel = "tailscale.com/funnel"
2024-02-08 06:45:42 +00:00
// If set to true, set up iptables/nftables rules in the proxy forward
// cluster traffic to the tailnet IP of that proxy. This can only be set
// on an Ingress. This is useful in cases where a cluster target needs
// to be able to reach a cluster workload exposed to tailnet via Ingress
// using the same hostname as a tailnet workload (in this case, the
// MagicDNS name of the ingress proxy). This annotation is experimental.
// If it is set to true, the proxy set up for Ingress, will run
// tailscale in non-userspace, with NET_ADMIN cap for tailscale
// container and will also run a privileged init container that enables
// forwarding.
// Eventually this behaviour might become the default.
AnnotationExperimentalForwardClusterTrafficViaL7IngresProxy = "tailscale.com/experimental-forward-cluster-traffic-via-ingress"
2023-08-24 20:16:58 +01:00
// Annotations set by the operator on pods to trigger restarts when the
2024-02-13 05:27:54 +00:00
// hostname, IP, FQDN or tailscaled config changes. If you add a new
// annotation here, also add it to tailscaleManagedAnnotations var to
// ensure that it does not get removed when a ProxyClass configuration
// is applied.
2023-11-24 16:24:48 +00:00
podAnnotationLastSetClusterIP = "tailscale.com/operator-last-set-cluster-ip"
2024-04-23 17:30:00 +01:00
podAnnotationLastSetClusterDNSName = "tailscale.com/operator-last-set-cluster-dns-name"
2023-11-24 16:24:48 +00:00
podAnnotationLastSetTailnetTargetIP = "tailscale.com/operator-last-set-ts-tailnet-target-ip"
podAnnotationLastSetTailnetTargetFQDN = "tailscale.com/operator-last-set-ts-tailnet-target-fqdn"
2024-01-09 14:13:22 +00:00
// podAnnotationLastSetConfigFileHash is sha256 hash of the current tailscaled configuration contents.
podAnnotationLastSetConfigFileHash = "tailscale.com/operator-last-set-config-file-hash"
2023-08-23 16:35:12 +01:00
)
2024-02-13 05:27:54 +00:00
var (
// tailscaleManagedLabels are label keys that tailscale operator sets on StatefulSets and Pods.
tailscaleManagedLabels = [ ] string { LabelManaged , LabelParentType , LabelParentName , LabelParentNamespace , "app" }
// tailscaleManagedAnnotations are annotation keys that tailscale operator sets on StatefulSets and Pods.
2024-02-27 15:14:09 +00:00
tailscaleManagedAnnotations = [ ] string { podAnnotationLastSetClusterIP , podAnnotationLastSetTailnetTargetIP , podAnnotationLastSetTailnetTargetFQDN , podAnnotationLastSetConfigFileHash }
2024-02-13 05:27:54 +00:00
)
2023-08-23 16:35:12 +01:00
type tailscaleSTSConfig struct {
ParentResourceName string
ParentResourceUID string
ChildResourceLabels map [ string ] string
2024-04-23 17:30:00 +01:00
ServeConfig * ipn . ServeConfig // if serve config is set, this is a proxy for Ingress
ClusterTargetIP string // ingress target IP
ClusterTargetDNSName string // ingress target DNS name
2024-02-08 06:45:42 +00:00
// If set to true, operator should configure containerboot to forward
// cluster traffic via the proxy set up for Kubernetes Ingress.
ForwardClusterTrafficViaL7IngressProxy bool
2023-08-30 08:31:37 +01:00
2024-01-09 14:13:22 +00:00
TailnetTargetIP string // egress target IP
2023-08-23 16:35:12 +01:00
2024-01-09 14:13:22 +00:00
TailnetTargetFQDN string // egress target FQDN
2023-11-24 16:24:48 +00:00
2023-08-23 16:35:12 +01:00
Hostname string
Tags [ ] string // if empty, use defaultTags
2023-12-14 13:51:59 +00:00
2024-01-09 14:13:22 +00:00
// Connector specifies a configuration of a Connector instance if that's
// what this StatefulSet should be created for.
Connector * connector
2024-02-13 05:27:54 +00:00
2024-06-07 19:56:42 +01:00
ProxyClassName string // name of ProxyClass if one needs to be applied to the proxy
ProxyClass * tsapi . ProxyClass // ProxyClass that needs to be applied to the proxy (if there is one)
2024-01-09 14:13:22 +00:00
}
type connector struct {
// routes is a list of subnet routes that this Connector should expose.
routes string
// isExitNode defines whether this Connector should act as an exit node.
isExitNode bool
2023-08-23 16:35:12 +01:00
}
2024-02-08 06:45:42 +00:00
type tsnetServer interface {
CertDomains ( ) [ ] string
}
2023-08-23 16:35:12 +01:00
type tailscaleSTSReconciler struct {
client . Client
2024-02-08 06:45:42 +00:00
tsnetServer tsnetServer
2023-08-23 16:35:12 +01:00
tsClient tsClient
defaultTags [ ] string
operatorNamespace string
proxyImage string
proxyPriorityClassName string
2023-10-17 18:05:02 +01:00
tsFirewallMode string
}
func ( sts tailscaleSTSReconciler ) validate ( ) error {
if sts . tsFirewallMode != "" && ! isValidFirewallMode ( sts . tsFirewallMode ) {
return fmt . Errorf ( "invalid proxy firewall mode %s, valid modes are iptables, nftables or unset" , sts . tsFirewallMode )
}
return nil
2023-08-23 16:35:12 +01:00
}
2023-08-30 18:37:51 +01:00
// IsHTTPSEnabledOnTailnet reports whether HTTPS is enabled on the tailnet.
func ( a * tailscaleSTSReconciler ) IsHTTPSEnabledOnTailnet ( ) bool {
return len ( a . tsnetServer . CertDomains ( ) ) > 0
}
2023-08-23 16:35:12 +01:00
// Provision ensures that the StatefulSet for the given service is running and
// up to date.
2023-08-30 08:31:37 +01:00
func ( a * tailscaleSTSReconciler ) Provision ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig ) ( * corev1 . Service , error ) {
2023-08-23 16:35:12 +01:00
// Do full reconcile.
2024-01-09 14:13:22 +00:00
// TODO (don't create Service for the Connector)
2023-08-23 16:35:12 +01:00
hsvc , err := a . reconcileHeadlessService ( ctx , logger , sts )
if err != nil {
2023-08-30 08:31:37 +01:00
return nil , fmt . Errorf ( "failed to reconcile headless service: %w" , err )
2023-08-23 16:35:12 +01:00
}
2024-06-07 19:56:42 +01:00
proxyClass := new ( tsapi . ProxyClass )
if sts . ProxyClassName != "" {
if err := a . Get ( ctx , types . NamespacedName { Name : sts . ProxyClassName } , proxyClass ) ; err != nil {
return nil , fmt . Errorf ( "failed to get ProxyClass: %w" , err )
}
if ! tsoperator . ProxyClassIsReady ( proxyClass ) {
logger . Infof ( "ProxyClass %s specified for the proxy, but it is not (yet) in a ready state, waiting.." )
return nil , nil
}
}
sts . ProxyClass = proxyClass
2024-05-10 16:32:37 +01:00
secretName , tsConfigHash , configs , err := a . createOrGetSecret ( ctx , logger , sts , hsvc )
2023-08-23 16:35:12 +01:00
if err != nil {
2023-08-30 08:31:37 +01:00
return nil , fmt . Errorf ( "failed to create or get API key secret: %w" , err )
2023-08-23 16:35:12 +01:00
}
2024-05-10 16:32:37 +01:00
_ , err = a . reconcileSTS ( ctx , logger , sts , hsvc , secretName , tsConfigHash , configs )
2023-08-23 16:35:12 +01:00
if err != nil {
2023-08-30 08:31:37 +01:00
return nil , fmt . Errorf ( "failed to reconcile statefulset: %w" , err )
2023-08-23 16:35:12 +01:00
}
2023-08-30 08:31:37 +01:00
return hsvc , nil
2023-08-23 16:35:12 +01:00
}
// Cleanup removes all resources associated that were created by Provision with
// the given labels. It returns true when all resources have been removed,
// otherwise it returns false and the caller should retry later.
func ( a * tailscaleSTSReconciler ) Cleanup ( ctx context . Context , logger * zap . SugaredLogger , labels map [ string ] string ) ( done bool , _ error ) {
// Need to delete the StatefulSet first, and delete it with foreground
// cascading deletion. That way, the pod that's writing to the Secret will
// stop running before we start looking at the Secret's contents, and
// assuming k8s ordering semantics don't mess with us, that should avoid
// tailscale device deletion races where we fail to notice a device that
// should be removed.
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , labels )
if err != nil {
return false , fmt . Errorf ( "getting statefulset: %w" , err )
}
if sts != nil {
if ! sts . GetDeletionTimestamp ( ) . IsZero ( ) {
// Deletion in progress, check again later. We'll get another
// notification when the deletion is complete.
logger . Debugf ( "waiting for statefulset %s/%s deletion" , sts . GetNamespace ( ) , sts . GetName ( ) )
return false , nil
}
err := a . DeleteAllOf ( ctx , & appsv1 . StatefulSet { } , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( labels ) , client . PropagationPolicy ( metav1 . DeletePropagationForeground ) )
if err != nil {
return false , fmt . Errorf ( "deleting statefulset: %w" , err )
}
logger . Debugf ( "started deletion of statefulset %s/%s" , sts . GetNamespace ( ) , sts . GetName ( ) )
return false , nil
}
2023-08-09 00:03:08 +01:00
id , _ , _ , err := a . DeviceInfo ( ctx , labels )
2023-08-23 16:35:12 +01:00
if err != nil {
return false , fmt . Errorf ( "getting device info: %w" , err )
}
if id != "" {
2023-10-23 16:22:55 +01:00
logger . Debugf ( "deleting device %s from control" , string ( id ) )
2023-08-23 16:35:12 +01:00
if err := a . tsClient . DeleteDevice ( ctx , string ( id ) ) ; err != nil {
2023-10-23 16:22:55 +01:00
errResp := & tailscale . ErrResponse { }
if ok := errors . As ( err , errResp ) ; ok && errResp . Status == http . StatusNotFound {
logger . Debugf ( "device %s not found, likely because it has already been deleted from control" , string ( id ) )
} else {
return false , fmt . Errorf ( "deleting device: %w" , err )
}
} else {
logger . Debugf ( "device %s deleted from control" , string ( id ) )
2023-08-23 16:35:12 +01:00
}
}
types := [ ] client . Object {
& corev1 . Service { } ,
& corev1 . Secret { } ,
}
for _ , typ := range types {
if err := a . DeleteAllOf ( ctx , typ , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( labels ) ) ; err != nil {
return false , err
}
}
return true , nil
}
2023-11-21 10:20:37 +00:00
// maxStatefulSetNameLength is maximum length the StatefulSet name can
// have to NOT result in a too long value for controller-revision-hash
// label value (see https://github.com/kubernetes/kubernetes/issues/64023).
// controller-revision-hash label value consists of StatefulSet's name + hyphen + revision hash.
// Maximum label value length is 63 chars. Length of revision hash is 10 chars.
// https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
// https://github.com/kubernetes/kubernetes/blob/v1.28.4/pkg/controller/history/controller_history.go#L90-L104
const maxStatefulSetNameLength = 63 - 10 - 1
// statefulSetNameBase accepts name of parent resource and returns a string in
// form ts-<portion-of-parentname>- that, when passed to Kubernetes name
// generation will NOT result in a StatefulSet name longer than 52 chars.
// This is done because of https://github.com/kubernetes/kubernetes/issues/64023.
func statefulSetNameBase ( parent string ) string {
base := fmt . Sprintf ( "ts-%s-" , parent )
generator := names . SimpleNameGenerator
2024-01-11 20:02:03 +00:00
for {
generatedName := generator . GenerateName ( base )
excess := len ( generatedName ) - maxStatefulSetNameLength
if excess <= 0 {
return base
}
2024-01-12 10:08:22 +00:00
base = base [ : len ( base ) - 1 - excess ] // cut off the excess chars
base = base + "-" // re-instate the dash
2023-11-21 10:20:37 +00:00
}
}
2023-08-23 16:35:12 +01:00
func ( a * tailscaleSTSReconciler ) reconcileHeadlessService ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig ) ( * corev1 . Service , error ) {
2023-11-21 10:20:37 +00:00
nameBase := statefulSetNameBase ( sts . ParentResourceName )
2023-08-23 16:35:12 +01:00
hsvc := & corev1 . Service {
ObjectMeta : metav1 . ObjectMeta {
2023-11-21 10:20:37 +00:00
GenerateName : nameBase ,
2023-08-23 16:35:12 +01:00
Namespace : a . operatorNamespace ,
Labels : sts . ChildResourceLabels ,
} ,
Spec : corev1 . ServiceSpec {
ClusterIP : "None" ,
Selector : map [ string ] string {
"app" : sts . ParentResourceUID ,
} ,
2024-07-05 12:21:48 +01:00
IPFamilyPolicy : ptr . To ( corev1 . IPFamilyPolicyPreferDualStack ) ,
2023-08-23 16:35:12 +01:00
} ,
}
logger . Debugf ( "reconciling headless service for StatefulSet" )
return createOrUpdate ( ctx , a . Client , a . operatorNamespace , hsvc , func ( svc * corev1 . Service ) { svc . Spec = hsvc . Spec } )
}
2024-10-07 14:58:45 +01:00
func ( a * tailscaleSTSReconciler ) createOrGetSecret ( ctx context . Context , logger * zap . SugaredLogger , stsC * tailscaleSTSConfig , hsvc * corev1 . Service ) ( secretName , hash string , configs tailscaledConfigs , _ error ) {
2023-08-23 16:35:12 +01:00
secret := & corev1 . Secret {
ObjectMeta : metav1 . ObjectMeta {
// Hardcode a -0 suffix so that in future, if we support
// multiple StatefulSet replicas, we can provision -N for
// those.
Name : hsvc . Name + "-0" ,
Namespace : a . operatorNamespace ,
Labels : stsC . ChildResourceLabels ,
} ,
}
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
var orig * corev1 . Secret // unmodified copy of secret
2023-08-23 16:35:12 +01:00
if err := a . Get ( ctx , client . ObjectKeyFromObject ( secret ) , secret ) ; err == nil {
logger . Debugf ( "secret %s/%s already exists" , secret . GetNamespace ( ) , secret . GetName ( ) )
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
orig = secret . DeepCopy ( )
2023-08-23 16:35:12 +01:00
} else if ! apierrors . IsNotFound ( err ) {
2024-05-10 16:32:37 +01:00
return "" , "" , nil , err
2023-08-23 16:35:12 +01:00
}
2024-05-10 16:32:37 +01:00
var authKey string
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
if orig == nil {
2024-02-27 15:14:09 +00:00
// Initially it contains only tailscaled config, but when the
// proxy starts, it will also store there the state, certs and
// ACME account key.
2023-08-24 20:18:17 +01:00
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , stsC . ChildResourceLabels )
if err != nil {
2024-05-10 16:32:37 +01:00
return "" , "" , nil , err
2023-08-24 20:18:17 +01:00
}
if sts != nil {
// StatefulSet exists, so we have already created the secret.
// If the secret is missing, they should delete the StatefulSet.
logger . Errorf ( "Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet." , sts . GetNamespace ( ) , sts . GetName ( ) )
2024-05-10 16:32:37 +01:00
return "" , "" , nil , nil
2023-08-24 20:18:17 +01:00
}
// Create API Key secret which is going to be used by the statefulset
// to authenticate with Tailscale.
logger . Debugf ( "creating authkey for new tailscale proxy" )
tags := stsC . Tags
if len ( tags ) == 0 {
tags = a . defaultTags
}
2024-09-11 12:19:29 +01:00
authKey , err = newAuthKey ( ctx , a . tsClient , tags )
2023-08-24 20:18:17 +01:00
if err != nil {
2024-05-10 16:32:37 +01:00
return "" , "" , nil , err
2023-08-24 20:18:17 +01:00
}
2024-01-09 14:13:22 +00:00
}
2024-05-10 16:32:37 +01:00
configs , err := tailscaledConfig ( stsC , authKey , orig )
2024-02-27 15:14:09 +00:00
if err != nil {
2024-05-10 16:32:37 +01:00
return "" , "" , nil , fmt . Errorf ( "error creating tailscaled config: %w" , err )
}
hash , err = tailscaledConfigHash ( configs )
if err != nil {
return "" , "" , nil , fmt . Errorf ( "error calculating hash of tailscaled configs: %w" , err )
}
latest := tailcfg . CapabilityVersion ( - 1 )
var latestConfig ipn . ConfigVAlpha
for key , val := range configs {
2024-10-07 14:58:45 +01:00
fn := tsoperator . TailscaledConfigFileName ( key )
2024-05-10 16:32:37 +01:00
b , err := json . Marshal ( val )
if err != nil {
return "" , "" , nil , fmt . Errorf ( "error marshalling tailscaled config: %w" , err )
}
mak . Set ( & secret . StringData , fn , string ( b ) )
if key > latest {
latest = key
latestConfig = val
}
2024-01-09 14:13:22 +00:00
}
2024-02-27 15:14:09 +00:00
2023-08-24 20:18:17 +01:00
if stsC . ServeConfig != nil {
j , err := json . Marshal ( stsC . ServeConfig )
if err != nil {
2024-05-10 16:32:37 +01:00
return "" , "" , nil , err
2023-08-24 20:18:17 +01:00
}
mak . Set ( & secret . StringData , "serve-config" , string ( j ) )
}
2024-01-09 14:13:22 +00:00
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
if orig != nil {
2024-05-10 16:32:37 +01:00
logger . Debugf ( "patching the existing proxy Secret with tailscaled config %s" , sanitizeConfigBytes ( latestConfig ) )
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
if err := a . Patch ( ctx , secret , client . MergeFrom ( orig ) ) ; err != nil {
2024-05-10 16:32:37 +01:00
return "" , "" , nil , err
2023-08-24 20:18:17 +01:00
}
} else {
2024-05-10 16:32:37 +01:00
logger . Debugf ( "creating a new Secret for the proxy with tailscaled config %s" , sanitizeConfigBytes ( latestConfig ) )
2023-08-24 20:18:17 +01:00
if err := a . Create ( ctx , secret ) ; err != nil {
2024-05-10 16:32:37 +01:00
return "" , "" , nil , err
2023-08-24 20:18:17 +01:00
}
2023-08-23 16:35:12 +01:00
}
2024-05-10 16:32:37 +01:00
return secret . Name , hash , configs , nil
2024-03-26 16:20:32 +00:00
}
// sanitizeConfigBytes returns ipn.ConfigVAlpha in string form with redacted
// auth key.
2024-05-10 16:32:37 +01:00
func sanitizeConfigBytes ( c ipn . ConfigVAlpha ) string {
2024-03-26 16:20:32 +00:00
if c . AuthKey != nil {
c . AuthKey = ptr . To ( "**redacted**" )
}
sanitizedBytes , err := json . Marshal ( c )
if err != nil {
return "invalid config"
}
return string ( sanitizedBytes )
2023-08-23 16:35:12 +01:00
}
2024-06-17 18:50:50 +01:00
// DeviceInfo returns the device ID, hostname and IPs for the Tailscale device
// that acts as an operator proxy. It retrieves info from a Kubernetes Secret
// labeled with the provided labels.
// Either of device ID, hostname and IPs can be empty string if not found in the Secret.
2023-08-09 00:03:08 +01:00
func ( a * tailscaleSTSReconciler ) DeviceInfo ( ctx context . Context , childLabels map [ string ] string ) ( id tailcfg . StableNodeID , hostname string , ips [ ] string , err error ) {
2023-08-23 16:35:12 +01:00
sec , err := getSingleObject [ corev1 . Secret ] ( ctx , a . Client , a . operatorNamespace , childLabels )
if err != nil {
2023-08-09 00:03:08 +01:00
return "" , "" , nil , err
2023-08-23 16:35:12 +01:00
}
if sec == nil {
2023-08-09 00:03:08 +01:00
return "" , "" , nil , nil
2023-08-23 16:35:12 +01:00
}
2024-09-11 12:19:29 +01:00
return deviceInfo ( sec )
}
func deviceInfo ( sec * corev1 . Secret ) ( id tailcfg . StableNodeID , hostname string , ips [ ] string , err error ) {
2023-08-23 16:35:12 +01:00
id = tailcfg . StableNodeID ( sec . Data [ "device_id" ] )
if id == "" {
2023-08-09 00:03:08 +01:00
return "" , "" , nil , nil
2023-08-23 16:35:12 +01:00
}
// Kubernetes chokes on well-formed FQDNs with the trailing dot, so we have
// to remove it.
hostname = strings . TrimSuffix ( string ( sec . Data [ "device_fqdn" ] ) , "." )
if hostname == "" {
2024-06-17 18:50:50 +01:00
// Device ID gets stored and retrieved in a different flow than
// FQDN and IPs. A device that acts as Kubernetes operator
// proxy, but whose route setup has failed might have an device
// ID, but no FQDN/IPs. If so, return the ID, to allow the
// operator to clean up such devices.
return id , "" , nil , nil
2023-08-23 16:35:12 +01:00
}
2023-08-09 00:03:08 +01:00
if rawDeviceIPs , ok := sec . Data [ "device_ips" ] ; ok {
if err := json . Unmarshal ( rawDeviceIPs , & ips ) ; err != nil {
return "" , "" , nil , err
}
}
return id , hostname , ips , nil
2023-08-23 16:35:12 +01:00
}
2024-09-11 12:19:29 +01:00
func newAuthKey ( ctx context . Context , tsClient tsClient , tags [ ] string ) ( string , error ) {
2023-08-23 16:35:12 +01:00
caps := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
Reusable : false ,
Preauthorized : true ,
Tags : tags ,
} ,
} ,
}
2024-09-11 12:19:29 +01:00
key , _ , err := tsClient . CreateKey ( ctx , caps )
2023-08-23 16:35:12 +01:00
if err != nil {
return "" , err
}
return key , nil
}
2023-10-30 18:18:09 +00:00
//go:embed deploy/manifests/proxy.yaml
2023-08-23 16:35:12 +01:00
var proxyYaml [ ] byte
2023-10-30 18:18:09 +00:00
//go:embed deploy/manifests/userspace-proxy.yaml
2023-08-24 20:18:17 +01:00
var userspaceProxyYaml [ ] byte
2024-05-10 16:32:37 +01:00
func ( a * tailscaleSTSReconciler ) reconcileSTS ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig , headlessSvc * corev1 . Service , proxySecret , tsConfigHash string , configs map [ tailcfg . CapabilityVersion ] ipn . ConfigVAlpha ) ( * appsv1 . StatefulSet , error ) {
2024-02-13 05:27:54 +00:00
ss := new ( appsv1 . StatefulSet )
2024-02-08 06:45:42 +00:00
if sts . ServeConfig != nil && sts . ForwardClusterTrafficViaL7IngressProxy != true { // If forwarding cluster traffic via is required we need non-userspace + NET_ADMIN + forwarding
2023-08-24 20:18:17 +01:00
if err := yaml . Unmarshal ( userspaceProxyYaml , & ss ) ; err != nil {
2024-02-13 05:27:54 +00:00
return nil , fmt . Errorf ( "failed to unmarshal userspace proxy spec: %v" , err )
2023-08-24 20:18:17 +01:00
}
} else {
if err := yaml . Unmarshal ( proxyYaml , & ss ) ; err != nil {
return nil , fmt . Errorf ( "failed to unmarshal proxy spec: %w" , err )
}
2023-10-17 01:25:00 +01:00
for i := range ss . Spec . Template . Spec . InitContainers {
c := & ss . Spec . Template . Spec . InitContainers [ i ]
if c . Name == "sysctler" {
c . Image = a . proxyImage
break
}
}
2023-08-23 16:35:12 +01:00
}
2024-02-13 05:27:54 +00:00
pod := & ss . Spec . Template
container := & pod . Spec . Containers [ 0 ]
2023-08-23 16:35:12 +01:00
container . Image = a . proxyImage
2024-01-09 14:13:22 +00:00
ss . ObjectMeta = metav1 . ObjectMeta {
Name : headlessSvc . Name ,
Namespace : a . operatorNamespace ,
2024-02-13 05:27:54 +00:00
}
for key , val := range sts . ChildResourceLabels {
mak . Set ( & ss . ObjectMeta . Labels , key , val )
2024-01-09 14:13:22 +00:00
}
ss . Spec . ServiceName = headlessSvc . Name
ss . Spec . Selector = & metav1 . LabelSelector {
MatchLabels : map [ string ] string {
"app" : sts . ParentResourceUID ,
} ,
}
2024-02-13 05:27:54 +00:00
mak . Set ( & pod . Labels , "app" , sts . ParentResourceUID )
2024-01-16 12:51:10 +00:00
for key , val := range sts . ChildResourceLabels {
2024-02-13 05:27:54 +00:00
pod . Labels [ key ] = val // sync StatefulSet labels to Pod to make it easier for users to select the Pod
2024-01-16 12:51:10 +00:00
}
2024-01-09 14:13:22 +00:00
// Generic containerboot configuration options.
2023-08-23 16:35:12 +01:00
container . Env = append ( container . Env ,
corev1 . EnvVar {
Name : "TS_KUBE_SECRET" ,
2024-01-09 14:13:22 +00:00
Value : proxySecret ,
2023-08-23 16:35:12 +01:00
} ,
2024-02-27 15:14:09 +00:00
corev1 . EnvVar {
2024-05-10 16:32:37 +01:00
// Old tailscaled config key is still used for backwards compatibility.
2024-02-27 15:14:09 +00:00
Name : "EXPERIMENTAL_TS_CONFIGFILE_PATH" ,
Value : "/etc/tsconfig/tailscaled" ,
} ,
2024-05-10 16:32:37 +01:00
corev1 . EnvVar {
// New style is in the form of cap-<capability-version>.hujson.
Name : "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR" ,
Value : "/etc/tsconfig" ,
} ,
2024-01-09 14:13:22 +00:00
)
2024-02-08 06:45:42 +00:00
if sts . ForwardClusterTrafficViaL7IngressProxy {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS" ,
Value : "true" ,
} )
}
2024-01-09 14:13:22 +00:00
// Configure containeboot to run tailscaled with a configfile read from the state Secret.
2024-02-27 15:14:09 +00:00
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetConfigFileHash , tsConfigHash )
2024-05-10 16:32:37 +01:00
configVolume := corev1 . Volume {
2024-02-27 15:14:09 +00:00
Name : "tailscaledconfig" ,
VolumeSource : corev1 . VolumeSource {
Secret : & corev1 . SecretVolumeSource {
SecretName : proxySecret ,
2024-01-09 14:13:22 +00:00
} ,
2024-02-27 15:14:09 +00:00
} ,
2024-05-10 16:32:37 +01:00
}
pod . Spec . Volumes = append ( ss . Spec . Template . Spec . Volumes , configVolume )
2024-02-27 15:14:09 +00:00
container . VolumeMounts = append ( container . VolumeMounts , corev1 . VolumeMount {
Name : "tailscaledconfig" ,
ReadOnly : true ,
MountPath : "/etc/tsconfig" ,
} )
2024-01-09 14:13:22 +00:00
if a . tsFirewallMode != "" {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_DEBUG_FIREWALL_MODE" ,
Value : a . tsFirewallMode ,
} )
}
2024-02-13 05:27:54 +00:00
pod . Spec . PriorityClassName = a . proxyPriorityClassName
2024-01-09 14:13:22 +00:00
// Ingress/egress proxy configuration options.
2023-08-30 08:31:37 +01:00
if sts . ClusterTargetIP != "" {
2023-08-24 20:18:17 +01:00
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_DEST_IP" ,
2023-08-30 08:31:37 +01:00
Value : sts . ClusterTargetIP ,
} )
2024-01-09 14:13:22 +00:00
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetClusterIP , sts . ClusterTargetIP )
2024-04-23 17:30:00 +01:00
} else if sts . ClusterTargetDNSName != "" {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_EXPERIMENTAL_DEST_DNS_NAME" ,
Value : sts . ClusterTargetDNSName ,
} )
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetClusterDNSName , sts . ClusterTargetDNSName )
2023-08-30 08:31:37 +01:00
} else if sts . TailnetTargetIP != "" {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_TAILNET_TARGET_IP" ,
Value : sts . TailnetTargetIP ,
2023-08-24 20:18:17 +01:00
} )
2024-01-09 14:13:22 +00:00
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetTailnetTargetIP , sts . TailnetTargetIP )
2023-11-24 16:24:48 +00:00
} else if sts . TailnetTargetFQDN != "" {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_TAILNET_TARGET_FQDN" ,
Value : sts . TailnetTargetFQDN ,
} )
2024-01-09 14:13:22 +00:00
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetTailnetTargetFQDN , sts . TailnetTargetFQDN )
2023-08-24 20:18:17 +01:00
} else if sts . ServeConfig != nil {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_SERVE_CONFIG" ,
Value : "/etc/tailscaled/serve-config" ,
} )
container . VolumeMounts = append ( container . VolumeMounts , corev1 . VolumeMount {
Name : "serve-config" ,
ReadOnly : true ,
MountPath : "/etc/tailscaled" ,
} )
2024-02-13 05:27:54 +00:00
pod . Spec . Volumes = append ( ss . Spec . Template . Spec . Volumes , corev1 . Volume {
2023-08-24 20:18:17 +01:00
Name : "serve-config" ,
VolumeSource : corev1 . VolumeSource {
Secret : & corev1 . SecretVolumeSource {
2024-01-09 14:13:22 +00:00
SecretName : proxySecret ,
2024-05-10 16:32:37 +01:00
Items : [ ] corev1 . KeyToPath { { Key : "serve-config" , Path : "serve-config" } } ,
2023-08-24 20:18:17 +01:00
} ,
} ,
} )
2023-08-23 16:35:12 +01:00
}
2024-09-08 05:48:38 +01:00
app , err := appInfoForProxy ( sts )
if err != nil {
// No need to error out if now or in future we end up in a
// situation where app info cannot be determined for one of the
// many proxy configurations that the operator can produce.
logger . Error ( "[unexpected] unable to determine proxy type" )
} else {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_INTERNAL_APP" ,
Value : app ,
} )
}
2024-01-09 14:13:22 +00:00
logger . Debugf ( "reconciling statefulset %s/%s" , ss . GetNamespace ( ) , ss . GetName ( ) )
2024-06-07 19:56:42 +01:00
if sts . ProxyClassName != "" {
logger . Debugf ( "configuring proxy resources with ProxyClass %s" , sts . ProxyClassName )
ss = applyProxyClassToStatefulSet ( sts . ProxyClass , ss , sts , logger )
2024-02-13 05:27:54 +00:00
}
updateSS := func ( s * appsv1 . StatefulSet ) {
s . Spec = ss . Spec
s . ObjectMeta . Labels = ss . Labels
s . ObjectMeta . Annotations = ss . Annotations
}
return createOrUpdate ( ctx , a . Client , a . operatorNamespace , ss , updateSS )
}
2024-09-08 05:48:38 +01:00
func appInfoForProxy ( cfg * tailscaleSTSConfig ) ( string , error ) {
if cfg . ClusterTargetDNSName != "" || cfg . ClusterTargetIP != "" {
2024-09-08 19:06:07 +01:00
return kubetypes . AppIngressProxy , nil
2024-09-08 05:48:38 +01:00
}
if cfg . TailnetTargetFQDN != "" || cfg . TailnetTargetIP != "" {
2024-09-08 19:06:07 +01:00
return kubetypes . AppEgressProxy , nil
2024-09-08 05:48:38 +01:00
}
if cfg . ServeConfig != nil {
2024-09-08 19:06:07 +01:00
return kubetypes . AppIngressResource , nil
2024-09-08 05:48:38 +01:00
}
if cfg . Connector != nil {
2024-09-08 19:06:07 +01:00
return kubetypes . AppConnector , nil
2024-09-08 05:48:38 +01:00
}
return "" , errors . New ( "unable to determine proxy type" )
}
2024-02-13 05:27:54 +00:00
// mergeStatefulSetLabelsOrAnnots returns a map that contains all keys/values
// present in 'custom' map as well as those keys/values from the current map
// whose keys are present in the 'managed' map. The reason why this merge is
// necessary is to ensure that labels/annotations applied from a ProxyClass get removed
// if they are removed from a ProxyClass or if the ProxyClass no longer applies
// to this StatefulSet whilst any tailscale managed labels/annotations remain present.
func mergeStatefulSetLabelsOrAnnots ( current , custom map [ string ] string , managed [ ] string ) map [ string ] string {
if custom == nil {
custom = make ( map [ string ] string )
}
if current == nil {
return custom
}
for key , val := range current {
if slices . Contains ( managed , key ) {
custom [ key ] = val
}
}
return custom
}
2024-04-26 08:25:06 +01:00
func applyProxyClassToStatefulSet ( pc * tsapi . ProxyClass , ss * appsv1 . StatefulSet , stsCfg * tailscaleSTSConfig , logger * zap . SugaredLogger ) * appsv1 . StatefulSet {
if pc == nil || ss == nil {
return ss
}
2024-10-07 14:58:45 +01:00
if stsCfg != nil && pc . Spec . Metrics != nil && pc . Spec . Metrics . Enable {
2024-04-26 08:25:06 +01:00
if stsCfg . TailnetTargetFQDN == "" && stsCfg . TailnetTargetIP == "" && ! stsCfg . ForwardClusterTrafficViaL7IngressProxy {
enableMetrics ( ss , pc )
} else if stsCfg . ForwardClusterTrafficViaL7IngressProxy {
// TODO (irbekrm): fix this
// For Ingress proxies that have been configured with
// tailscale.com/experimental-forward-cluster-traffic-via-ingress
// annotation, all cluster traffic is forwarded to the
// Ingress backend(s).
logger . Info ( "ProxyClass specifies that metrics should be enabled, but this is currently not supported for Ingress proxies that accept cluster traffic." )
} else {
// TODO (irbekrm): fix this
// For egress proxies, currently all cluster traffic is forwarded to the tailnet target.
logger . Info ( "ProxyClass specifies that metrics should be enabled, but this is currently not supported for Ingress proxies that accept cluster traffic." )
}
}
if pc . Spec . StatefulSet == nil {
2024-02-13 05:27:54 +00:00
return ss
}
// Update StatefulSet metadata.
if wantsSSLabels := pc . Spec . StatefulSet . Labels ; len ( wantsSSLabels ) > 0 {
ss . ObjectMeta . Labels = mergeStatefulSetLabelsOrAnnots ( ss . ObjectMeta . Labels , wantsSSLabels , tailscaleManagedLabels )
}
if wantsSSAnnots := pc . Spec . StatefulSet . Annotations ; len ( wantsSSAnnots ) > 0 {
ss . ObjectMeta . Annotations = mergeStatefulSetLabelsOrAnnots ( ss . ObjectMeta . Annotations , wantsSSAnnots , tailscaleManagedAnnotations )
}
// Update Pod fields.
if pc . Spec . StatefulSet . Pod == nil {
return ss
}
wantsPod := pc . Spec . StatefulSet . Pod
if wantsPodLabels := wantsPod . Labels ; len ( wantsPodLabels ) > 0 {
ss . Spec . Template . ObjectMeta . Labels = mergeStatefulSetLabelsOrAnnots ( ss . Spec . Template . ObjectMeta . Labels , wantsPodLabels , tailscaleManagedLabels )
}
if wantsPodAnnots := wantsPod . Annotations ; len ( wantsPodAnnots ) > 0 {
ss . Spec . Template . ObjectMeta . Annotations = mergeStatefulSetLabelsOrAnnots ( ss . Spec . Template . ObjectMeta . Annotations , wantsPodAnnots , tailscaleManagedAnnotations )
}
ss . Spec . Template . Spec . SecurityContext = wantsPod . SecurityContext
ss . Spec . Template . Spec . ImagePullSecrets = wantsPod . ImagePullSecrets
ss . Spec . Template . Spec . NodeName = wantsPod . NodeName
ss . Spec . Template . Spec . NodeSelector = wantsPod . NodeSelector
2024-04-24 17:31:35 +01:00
ss . Spec . Template . Spec . Affinity = wantsPod . Affinity
2024-02-13 05:27:54 +00:00
ss . Spec . Template . Spec . Tolerations = wantsPod . Tolerations
2024-10-30 10:45:31 +00:00
ss . Spec . Template . Spec . TopologySpreadConstraints = wantsPod . TopologySpreadConstraints
2024-02-13 05:27:54 +00:00
// Update containers.
updateContainer := func ( overlay * tsapi . Container , base corev1 . Container ) corev1 . Container {
if overlay == nil {
return base
}
if overlay . SecurityContext != nil {
base . SecurityContext = overlay . SecurityContext
}
base . Resources = overlay . Resources
2024-04-15 17:24:59 +01:00
for _ , e := range overlay . Env {
// Env vars configured via ProxyClass might override env
// vars that have been specified by the operator, i.e
// TS_USERSPACE. The intended behaviour is to allow this
// and in practice it works without explicitly removing
// the operator configured value here as a later value
// in the env var list overrides an earlier one.
base . Env = append ( base . Env , corev1 . EnvVar { Name : string ( e . Name ) , Value : e . Value } )
}
2024-06-07 16:18:44 +01:00
if overlay . Image != "" {
base . Image = overlay . Image
}
if overlay . ImagePullPolicy != "" {
base . ImagePullPolicy = overlay . ImagePullPolicy
}
2024-02-13 05:27:54 +00:00
return base
}
for i , c := range ss . Spec . Template . Spec . Containers {
if c . Name == "tailscale" {
ss . Spec . Template . Spec . Containers [ i ] = updateContainer ( wantsPod . TailscaleContainer , ss . Spec . Template . Spec . Containers [ i ] )
break
}
}
if initContainers := ss . Spec . Template . Spec . InitContainers ; len ( initContainers ) > 0 {
for i , c := range initContainers {
if c . Name == "sysctler" {
ss . Spec . Template . Spec . InitContainers [ i ] = updateContainer ( wantsPod . TailscaleInitContainer , initContainers [ i ] )
break
}
}
}
return ss
2024-01-09 14:13:22 +00:00
}
2023-08-24 20:16:58 +01:00
2024-04-26 08:25:06 +01:00
func enableMetrics ( ss * appsv1 . StatefulSet , pc * tsapi . ProxyClass ) {
for i , c := range ss . Spec . Template . Spec . Containers {
if c . Name == "tailscale" {
// Serve metrics on on <pod-ip>:9001/debug/metrics. If
// we didn't specify Pod IP here, the proxy would, in
// some cases, also listen to its Tailscale IP- we don't
// want folks to start relying on this side-effect as a
// feature.
ss . Spec . Template . Spec . Containers [ i ] . Env = append ( ss . Spec . Template . Spec . Containers [ i ] . Env , corev1 . EnvVar { Name : "TS_TAILSCALED_EXTRA_ARGS" , Value : "--debug=$(POD_IP):9001" } )
ss . Spec . Template . Spec . Containers [ i ] . Ports = append ( ss . Spec . Template . Spec . Containers [ i ] . Ports , corev1 . ContainerPort { Name : "metrics" , Protocol : "TCP" , HostPort : 9001 , ContainerPort : 9001 } )
break
}
}
}
2024-05-10 16:32:37 +01:00
func readAuthKey ( secret * corev1 . Secret , key string ) ( * string , error ) {
origConf := & ipn . ConfigVAlpha { }
if err := json . Unmarshal ( [ ] byte ( secret . Data [ key ] ) , origConf ) ; err != nil {
return nil , fmt . Errorf ( "error unmarshaling previous tailscaled config in %q: %w" , key , err )
}
return origConf . AuthKey , nil
}
2024-01-09 14:13:22 +00:00
// tailscaledConfig takes a proxy config, a newly generated auth key if
// generated and a Secret with the previous proxy state and auth key and
2024-05-10 16:32:37 +01:00
// returns tailscaled configuration and a hash of that configuration.
//
// As of 2024-05-09 it also returns legacy tailscaled config without the
// later added NoStatefulFilter field to support proxies older than cap95.
// TODO (irbekrm): remove the legacy config once we no longer need to support
// versions older than cap94,
// https://tailscale.com/kb/1236/kubernetes-operator#operator-and-proxies
2024-10-07 14:58:45 +01:00
func tailscaledConfig ( stsC * tailscaleSTSConfig , newAuthkey string , oldSecret * corev1 . Secret ) ( tailscaledConfigs , error ) {
2024-05-10 16:32:37 +01:00
conf := & ipn . ConfigVAlpha {
Version : "alpha0" ,
AcceptDNS : "false" ,
AcceptRoutes : "false" , // AcceptRoutes defaults to true
Locked : "false" ,
Hostname : & stsC . Hostname ,
NoStatefulFiltering : "false" ,
}
// For egress proxies only, we need to ensure that stateful filtering is
// not in place so that traffic from cluster can be forwarded via
// Tailscale IPs.
if stsC . TailnetTargetFQDN != "" || stsC . TailnetTargetIP != "" {
conf . NoStatefulFiltering = "true"
2024-01-09 14:13:22 +00:00
}
if stsC . Connector != nil {
routes , err := netutil . CalcAdvertiseRoutes ( stsC . Connector . routes , stsC . Connector . isExitNode )
if err != nil {
2024-05-10 16:32:37 +01:00
return nil , fmt . Errorf ( "error calculating routes: %w" , err )
2024-01-09 14:13:22 +00:00
}
conf . AdvertiseRoutes = routes
}
2024-06-07 19:56:42 +01:00
if shouldAcceptRoutes ( stsC . ProxyClass ) {
conf . AcceptRoutes = "true"
}
2024-01-09 14:13:22 +00:00
if newAuthkey != "" {
conf . AuthKey = & newAuthkey
2024-09-27 17:47:27 +01:00
} else if shouldRetainAuthKey ( oldSecret ) {
key , err := authKeyFromSecret ( oldSecret )
if err != nil {
return nil , fmt . Errorf ( "error retrieving auth key from Secret: %w" , err )
2024-01-09 14:13:22 +00:00
}
2024-09-27 17:47:27 +01:00
conf . AuthKey = key
2023-08-23 16:35:12 +01:00
}
2024-05-10 16:32:37 +01:00
capVerConfigs := make ( map [ tailcfg . CapabilityVersion ] ipn . ConfigVAlpha )
capVerConfigs [ 95 ] = * conf
// legacy config should not contain NoStatefulFiltering field.
conf . NoStatefulFiltering . Clear ( )
capVerConfigs [ 94 ] = * conf
return capVerConfigs , nil
2023-08-23 16:35:12 +01:00
}
2024-09-27 17:47:27 +01:00
func authKeyFromSecret ( s * corev1 . Secret ) ( key * string , err error ) {
latest := tailcfg . CapabilityVersion ( - 1 )
latestStr := ""
for k , data := range s . Data {
// write to StringData, read from Data as StringData is write-only
if len ( data ) == 0 {
continue
}
v , err := tsoperator . CapVerFromFileName ( k )
if err != nil {
continue
}
if v > latest {
latestStr = k
latest = v
}
}
// Allow for configs that don't contain an auth key. Perhaps
// users have some mechanisms to delete them. Auth key is
// normally not needed after the initial login.
if latestStr != "" {
return readAuthKey ( s , latestStr )
}
return key , nil
}
// shouldRetainAuthKey returns true if the state stored in a proxy's state Secret suggests that auth key should be
// retained (because the proxy has not yet successfully authenticated).
func shouldRetainAuthKey ( s * corev1 . Secret ) bool {
if s == nil {
return false // nothing to retain here
}
return len ( s . Data [ "device_id" ] ) == 0 // proxy has not authed yet
}
2024-06-07 19:56:42 +01:00
func shouldAcceptRoutes ( pc * tsapi . ProxyClass ) bool {
return pc != nil && pc . Spec . TailscaleConfig != nil && pc . Spec . TailscaleConfig . AcceptRoutes
}
2023-08-23 16:35:12 +01:00
// ptrObject is a type constraint for pointer types that implement
// client.Object.
type ptrObject [ T any ] interface {
client . Object
* T
}
2024-10-07 14:58:45 +01:00
type tailscaledConfigs map [ tailcfg . CapabilityVersion ] ipn . ConfigVAlpha
2024-05-10 16:32:37 +01:00
// hashBytes produces a hash for the provided tailscaled config that is the same across
2024-01-09 14:13:22 +00:00
// different invocations of this code. We do not use the
// tailscale.com/deephash.Hash here because that produces a different hash for
// the same value in different tailscale builds. The hash we are producing here
// is used to determine if the container running the Connector Tailscale node
// needs to be restarted. The container does not need restarting when the only
// thing that changed is operator version (the hash is also exposed to users via
// an annotation and might be confusing if it changes without the config having
// changed).
2024-10-07 14:58:45 +01:00
func tailscaledConfigHash ( c tailscaledConfigs ) ( string , error ) {
2024-05-10 16:32:37 +01:00
b , err := json . Marshal ( c )
2024-01-09 14:13:22 +00:00
if err != nil {
2024-05-10 16:32:37 +01:00
return "" , fmt . Errorf ( "error marshalling tailscaled configs: %w" , err )
}
h := sha256 . New ( )
if _ , err = h . Write ( b ) ; err != nil {
2024-01-09 14:13:22 +00:00
return "" , fmt . Errorf ( "error calculating hash: %w" , err )
}
return fmt . Sprintf ( "%x" , h . Sum ( nil ) ) , nil
}
2023-08-23 16:35:12 +01:00
// createOrUpdate adds obj to the k8s cluster, unless the object already exists,
// in which case update is called to make changes to it. If update is nil, the
// existing object is returned unmodified.
//
// obj is looked up by its Name and Namespace if Name is set, otherwise it's
// looked up by labels.
func createOrUpdate [ T any , O ptrObject [ T ] ] ( ctx context . Context , c client . Client , ns string , obj O , update func ( O ) ) ( O , error ) {
var (
existing O
err error
)
if obj . GetName ( ) != "" {
existing = new ( T )
existing . SetName ( obj . GetName ( ) )
existing . SetNamespace ( obj . GetNamespace ( ) )
err = c . Get ( ctx , client . ObjectKeyFromObject ( obj ) , existing )
} else {
existing , err = getSingleObject [ T , O ] ( ctx , c , ns , obj . GetLabels ( ) )
}
if err == nil && existing != nil {
if update != nil {
update ( existing )
if err := c . Update ( ctx , existing ) ; err != nil {
return nil , err
}
}
return existing , nil
}
if err != nil && ! apierrors . IsNotFound ( err ) {
return nil , fmt . Errorf ( "failed to get object: %w" , err )
}
if err := c . Create ( ctx , obj ) ; err != nil {
return nil , err
}
return obj , nil
}
// getSingleObject searches for k8s objects of type T
// (e.g. corev1.Service) with the given labels, and returns
// it. Returns nil if no objects match the labels, and an error if
// more than one object matches.
func getSingleObject [ T any , O ptrObject [ T ] ] ( ctx context . Context , c client . Client , ns string , labels map [ string ] string ) ( O , error ) {
ret := O ( new ( T ) )
kinds , _ , err := c . Scheme ( ) . ObjectKinds ( ret )
if err != nil {
return nil , err
}
if len ( kinds ) != 1 {
// TODO: the runtime package apparently has a "pick the best
// GVK" function somewhere that might be good enough?
return nil , fmt . Errorf ( "more than 1 GroupVersionKind for %T" , ret )
}
gvk := kinds [ 0 ]
gvk . Kind += "List"
lst := unstructured . UnstructuredList { }
lst . SetGroupVersionKind ( gvk )
if err := c . List ( ctx , & lst , client . InNamespace ( ns ) , client . MatchingLabels ( labels ) ) ; err != nil {
return nil , err
}
if len ( lst . Items ) == 0 {
return nil , nil
}
if len ( lst . Items ) > 1 {
return nil , fmt . Errorf ( "found multiple matching %T objects" , ret )
}
if err := c . Scheme ( ) . Convert ( & lst . Items [ 0 ] , ret , nil ) ; err != nil {
return nil , err
}
return ret , nil
}
func defaultBool ( envName string , defVal bool ) bool {
vs := os . Getenv ( envName )
if vs == "" {
return defVal
}
v , _ := opt . Bool ( vs ) . Get ( )
return v
}
func defaultEnv ( envName , defVal string ) string {
v := os . Getenv ( envName )
if v == "" {
return defVal
}
return v
}
2024-06-18 19:01:40 +01:00
func nameForService ( svc * corev1 . Service ) string {
2023-08-23 16:35:12 +01:00
if h , ok := svc . Annotations [ AnnotationHostname ] ; ok {
2024-06-18 19:01:40 +01:00
return h
2023-08-23 16:35:12 +01:00
}
2024-06-18 19:01:40 +01:00
return svc . Namespace + "-" + svc . Name
2023-08-23 16:35:12 +01:00
}
2023-10-17 18:05:02 +01:00
func isValidFirewallMode ( m string ) bool {
return m == "auto" || m == "nftables" || m == "iptables"
}