2023-08-23 16:35:12 +01:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2023-08-24 23:02:42 +01:00
//go:build !plan9
2023-08-23 16:35:12 +01:00
package main
import (
"context"
2024-01-09 14:13:22 +00:00
"crypto/sha256"
2023-08-23 16:35:12 +01:00
_ "embed"
2023-08-24 20:18:17 +01:00
"encoding/json"
2023-10-23 16:22:55 +01:00
"errors"
2023-08-23 16:35:12 +01:00
"fmt"
2023-10-23 16:22:55 +01:00
"net/http"
2023-08-23 16:35:12 +01:00
"os"
"strings"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2023-11-21 10:20:37 +00:00
"k8s.io/apiserver/pkg/storage/names"
2023-08-23 16:35:12 +01:00
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/yaml"
"tailscale.com/client/tailscale"
2023-08-24 20:18:17 +01:00
"tailscale.com/ipn"
2024-01-09 14:13:22 +00:00
"tailscale.com/net/netutil"
2023-08-23 16:35:12 +01:00
"tailscale.com/tailcfg"
2023-08-30 18:37:51 +01:00
"tailscale.com/tsnet"
2023-08-23 16:35:12 +01:00
"tailscale.com/types/opt"
"tailscale.com/util/dnsname"
2023-08-24 20:18:17 +01:00
"tailscale.com/util/mak"
2023-08-23 16:35:12 +01:00
)
const (
LabelManaged = "tailscale.com/managed"
LabelParentType = "tailscale.com/parent-resource-type"
LabelParentName = "tailscale.com/parent-resource"
LabelParentNamespace = "tailscale.com/parent-resource-ns"
FinalizerName = "tailscale.com/finalizer"
2023-08-24 20:16:58 +01:00
// Annotations settable by users on services.
2023-09-20 16:51:50 +01:00
AnnotationExpose = "tailscale.com/expose"
AnnotationTags = "tailscale.com/tags"
AnnotationHostname = "tailscale.com/hostname"
annotationTailnetTargetIPOld = "tailscale.com/ts-tailnet-target-ip"
AnnotationTailnetTargetIP = "tailscale.com/tailnet-ip"
2023-11-24 16:24:48 +00:00
//MagicDNS name of tailnet node.
AnnotationTailnetTargetFQDN = "tailscale.com/tailnet-fqdn"
2023-08-24 20:16:58 +01:00
2023-08-24 20:18:17 +01:00
// Annotations settable by users on ingresses.
AnnotationFunnel = "tailscale.com/funnel"
2023-08-24 20:16:58 +01:00
// Annotations set by the operator on pods to trigger restarts when the
2024-01-09 14:13:22 +00:00
// hostname, IP, FQDN or tailscaled config changes.
2023-11-24 16:24:48 +00:00
podAnnotationLastSetClusterIP = "tailscale.com/operator-last-set-cluster-ip"
podAnnotationLastSetHostname = "tailscale.com/operator-last-set-hostname"
podAnnotationLastSetTailnetTargetIP = "tailscale.com/operator-last-set-ts-tailnet-target-ip"
podAnnotationLastSetTailnetTargetFQDN = "tailscale.com/operator-last-set-ts-tailnet-target-fqdn"
2024-01-09 14:13:22 +00:00
// podAnnotationLastSetConfigFileHash is sha256 hash of the current tailscaled configuration contents.
podAnnotationLastSetConfigFileHash = "tailscale.com/operator-last-set-config-file-hash"
// tailscaledConfigKey is the name of the key in proxy Secret Data that
// holds the tailscaled config contents.
tailscaledConfigKey = "tailscaled"
2023-08-23 16:35:12 +01:00
)
type tailscaleSTSConfig struct {
ParentResourceName string
ParentResourceUID string
ChildResourceLabels map [ string ] string
2024-01-09 14:13:22 +00:00
ServeConfig * ipn . ServeConfig
ClusterTargetIP string // ingress target
2023-08-30 08:31:37 +01:00
2024-01-09 14:13:22 +00:00
TailnetTargetIP string // egress target IP
2023-08-23 16:35:12 +01:00
2024-01-09 14:13:22 +00:00
TailnetTargetFQDN string // egress target FQDN
2023-11-24 16:24:48 +00:00
2023-08-23 16:35:12 +01:00
Hostname string
Tags [ ] string // if empty, use defaultTags
2023-12-14 13:51:59 +00:00
2024-01-09 14:13:22 +00:00
// Connector specifies a configuration of a Connector instance if that's
// what this StatefulSet should be created for.
Connector * connector
}
type connector struct {
// routes is a list of subnet routes that this Connector should expose.
routes string
// isExitNode defines whether this Connector should act as an exit node.
isExitNode bool
2023-08-23 16:35:12 +01:00
}
type tailscaleSTSReconciler struct {
client . Client
2023-08-30 18:37:51 +01:00
tsnetServer * tsnet . Server
2023-08-23 16:35:12 +01:00
tsClient tsClient
defaultTags [ ] string
operatorNamespace string
proxyImage string
proxyPriorityClassName string
2023-10-17 18:05:02 +01:00
tsFirewallMode string
}
func ( sts tailscaleSTSReconciler ) validate ( ) error {
if sts . tsFirewallMode != "" && ! isValidFirewallMode ( sts . tsFirewallMode ) {
return fmt . Errorf ( "invalid proxy firewall mode %s, valid modes are iptables, nftables or unset" , sts . tsFirewallMode )
}
return nil
2023-08-23 16:35:12 +01:00
}
2023-08-30 18:37:51 +01:00
// IsHTTPSEnabledOnTailnet reports whether HTTPS is enabled on the tailnet.
func ( a * tailscaleSTSReconciler ) IsHTTPSEnabledOnTailnet ( ) bool {
return len ( a . tsnetServer . CertDomains ( ) ) > 0
}
2023-08-23 16:35:12 +01:00
// Provision ensures that the StatefulSet for the given service is running and
// up to date.
2023-08-30 08:31:37 +01:00
func ( a * tailscaleSTSReconciler ) Provision ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig ) ( * corev1 . Service , error ) {
2023-08-23 16:35:12 +01:00
// Do full reconcile.
2024-01-09 14:13:22 +00:00
// TODO (don't create Service for the Connector)
2023-08-23 16:35:12 +01:00
hsvc , err := a . reconcileHeadlessService ( ctx , logger , sts )
if err != nil {
2023-08-30 08:31:37 +01:00
return nil , fmt . Errorf ( "failed to reconcile headless service: %w" , err )
2023-08-23 16:35:12 +01:00
}
2024-01-09 14:13:22 +00:00
secretName , tsConfigHash , err := a . createOrGetSecret ( ctx , logger , sts , hsvc )
2023-08-23 16:35:12 +01:00
if err != nil {
2023-08-30 08:31:37 +01:00
return nil , fmt . Errorf ( "failed to create or get API key secret: %w" , err )
2023-08-23 16:35:12 +01:00
}
2024-01-09 14:13:22 +00:00
_ , err = a . reconcileSTS ( ctx , logger , sts , hsvc , secretName , tsConfigHash )
2023-08-23 16:35:12 +01:00
if err != nil {
2023-08-30 08:31:37 +01:00
return nil , fmt . Errorf ( "failed to reconcile statefulset: %w" , err )
2023-08-23 16:35:12 +01:00
}
2023-08-30 08:31:37 +01:00
return hsvc , nil
2023-08-23 16:35:12 +01:00
}
// Cleanup removes all resources associated that were created by Provision with
// the given labels. It returns true when all resources have been removed,
// otherwise it returns false and the caller should retry later.
func ( a * tailscaleSTSReconciler ) Cleanup ( ctx context . Context , logger * zap . SugaredLogger , labels map [ string ] string ) ( done bool , _ error ) {
// Need to delete the StatefulSet first, and delete it with foreground
// cascading deletion. That way, the pod that's writing to the Secret will
// stop running before we start looking at the Secret's contents, and
// assuming k8s ordering semantics don't mess with us, that should avoid
// tailscale device deletion races where we fail to notice a device that
// should be removed.
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , labels )
if err != nil {
return false , fmt . Errorf ( "getting statefulset: %w" , err )
}
if sts != nil {
if ! sts . GetDeletionTimestamp ( ) . IsZero ( ) {
// Deletion in progress, check again later. We'll get another
// notification when the deletion is complete.
logger . Debugf ( "waiting for statefulset %s/%s deletion" , sts . GetNamespace ( ) , sts . GetName ( ) )
return false , nil
}
err := a . DeleteAllOf ( ctx , & appsv1 . StatefulSet { } , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( labels ) , client . PropagationPolicy ( metav1 . DeletePropagationForeground ) )
if err != nil {
return false , fmt . Errorf ( "deleting statefulset: %w" , err )
}
logger . Debugf ( "started deletion of statefulset %s/%s" , sts . GetNamespace ( ) , sts . GetName ( ) )
return false , nil
}
2023-08-09 00:03:08 +01:00
id , _ , _ , err := a . DeviceInfo ( ctx , labels )
2023-08-23 16:35:12 +01:00
if err != nil {
return false , fmt . Errorf ( "getting device info: %w" , err )
}
if id != "" {
2023-10-23 16:22:55 +01:00
logger . Debugf ( "deleting device %s from control" , string ( id ) )
2023-08-23 16:35:12 +01:00
if err := a . tsClient . DeleteDevice ( ctx , string ( id ) ) ; err != nil {
2023-10-23 16:22:55 +01:00
errResp := & tailscale . ErrResponse { }
if ok := errors . As ( err , errResp ) ; ok && errResp . Status == http . StatusNotFound {
logger . Debugf ( "device %s not found, likely because it has already been deleted from control" , string ( id ) )
} else {
return false , fmt . Errorf ( "deleting device: %w" , err )
}
} else {
logger . Debugf ( "device %s deleted from control" , string ( id ) )
2023-08-23 16:35:12 +01:00
}
}
types := [ ] client . Object {
& corev1 . Service { } ,
& corev1 . Secret { } ,
}
for _ , typ := range types {
if err := a . DeleteAllOf ( ctx , typ , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( labels ) ) ; err != nil {
return false , err
}
}
return true , nil
}
2023-11-21 10:20:37 +00:00
// maxStatefulSetNameLength is maximum length the StatefulSet name can
// have to NOT result in a too long value for controller-revision-hash
// label value (see https://github.com/kubernetes/kubernetes/issues/64023).
// controller-revision-hash label value consists of StatefulSet's name + hyphen + revision hash.
// Maximum label value length is 63 chars. Length of revision hash is 10 chars.
// https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
// https://github.com/kubernetes/kubernetes/blob/v1.28.4/pkg/controller/history/controller_history.go#L90-L104
const maxStatefulSetNameLength = 63 - 10 - 1
// statefulSetNameBase accepts name of parent resource and returns a string in
// form ts-<portion-of-parentname>- that, when passed to Kubernetes name
// generation will NOT result in a StatefulSet name longer than 52 chars.
// This is done because of https://github.com/kubernetes/kubernetes/issues/64023.
func statefulSetNameBase ( parent string ) string {
base := fmt . Sprintf ( "ts-%s-" , parent )
generator := names . SimpleNameGenerator
2024-01-11 20:02:03 +00:00
for {
generatedName := generator . GenerateName ( base )
excess := len ( generatedName ) - maxStatefulSetNameLength
if excess <= 0 {
return base
}
2024-01-12 10:08:22 +00:00
base = base [ : len ( base ) - 1 - excess ] // cut off the excess chars
base = base + "-" // re-instate the dash
2023-11-21 10:20:37 +00:00
}
}
2023-08-23 16:35:12 +01:00
func ( a * tailscaleSTSReconciler ) reconcileHeadlessService ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig ) ( * corev1 . Service , error ) {
2023-11-21 10:20:37 +00:00
nameBase := statefulSetNameBase ( sts . ParentResourceName )
2023-08-23 16:35:12 +01:00
hsvc := & corev1 . Service {
ObjectMeta : metav1 . ObjectMeta {
2023-11-21 10:20:37 +00:00
GenerateName : nameBase ,
2023-08-23 16:35:12 +01:00
Namespace : a . operatorNamespace ,
Labels : sts . ChildResourceLabels ,
} ,
Spec : corev1 . ServiceSpec {
ClusterIP : "None" ,
Selector : map [ string ] string {
"app" : sts . ParentResourceUID ,
} ,
} ,
}
logger . Debugf ( "reconciling headless service for StatefulSet" )
return createOrUpdate ( ctx , a . Client , a . operatorNamespace , hsvc , func ( svc * corev1 . Service ) { svc . Spec = hsvc . Spec } )
}
2024-01-09 14:13:22 +00:00
func ( a * tailscaleSTSReconciler ) createOrGetSecret ( ctx context . Context , logger * zap . SugaredLogger , stsC * tailscaleSTSConfig , hsvc * corev1 . Service ) ( string , string , error ) {
2023-08-23 16:35:12 +01:00
secret := & corev1 . Secret {
ObjectMeta : metav1 . ObjectMeta {
// Hardcode a -0 suffix so that in future, if we support
// multiple StatefulSet replicas, we can provision -N for
// those.
Name : hsvc . Name + "-0" ,
Namespace : a . operatorNamespace ,
Labels : stsC . ChildResourceLabels ,
} ,
}
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
var orig * corev1 . Secret // unmodified copy of secret
2023-08-23 16:35:12 +01:00
if err := a . Get ( ctx , client . ObjectKeyFromObject ( secret ) , secret ) ; err == nil {
logger . Debugf ( "secret %s/%s already exists" , secret . GetNamespace ( ) , secret . GetName ( ) )
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
orig = secret . DeepCopy ( )
2023-08-23 16:35:12 +01:00
} else if ! apierrors . IsNotFound ( err ) {
2024-01-09 14:13:22 +00:00
return "" , "" , err
2023-08-23 16:35:12 +01:00
}
2024-01-09 14:13:22 +00:00
var (
authKey , hash string
)
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
if orig == nil {
2023-08-24 20:18:17 +01:00
// Secret doesn't exist yet, create one. Initially it contains
// only the Tailscale authkey, but once Tailscale starts it'll
// also store the daemon state.
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , stsC . ChildResourceLabels )
if err != nil {
2024-01-09 14:13:22 +00:00
return "" , "" , err
2023-08-24 20:18:17 +01:00
}
if sts != nil {
// StatefulSet exists, so we have already created the secret.
// If the secret is missing, they should delete the StatefulSet.
logger . Errorf ( "Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet." , sts . GetNamespace ( ) , sts . GetName ( ) )
2024-01-09 14:13:22 +00:00
return "" , "" , nil
2023-08-24 20:18:17 +01:00
}
// Create API Key secret which is going to be used by the statefulset
// to authenticate with Tailscale.
logger . Debugf ( "creating authkey for new tailscale proxy" )
tags := stsC . Tags
if len ( tags ) == 0 {
tags = a . defaultTags
}
2024-01-09 14:13:22 +00:00
authKey , err = a . newAuthKey ( ctx , tags )
2023-08-24 20:18:17 +01:00
if err != nil {
2024-01-09 14:13:22 +00:00
return "" , "" , err
2023-08-24 20:18:17 +01:00
}
2024-01-09 14:13:22 +00:00
}
if ! shouldDoTailscaledDeclarativeConfig ( stsC ) && authKey != "" {
2023-08-24 20:18:17 +01:00
mak . Set ( & secret . StringData , "authkey" , authKey )
2023-08-23 16:35:12 +01:00
}
2024-01-09 14:13:22 +00:00
if shouldDoTailscaledDeclarativeConfig ( stsC ) {
confFileBytes , h , err := tailscaledConfig ( stsC , authKey , orig )
if err != nil {
return "" , "" , fmt . Errorf ( "error creating tailscaled config: %w" , err )
}
hash = h
mak . Set ( & secret . StringData , tailscaledConfigKey , string ( confFileBytes ) )
}
2023-08-24 20:18:17 +01:00
if stsC . ServeConfig != nil {
j , err := json . Marshal ( stsC . ServeConfig )
if err != nil {
2024-01-09 14:13:22 +00:00
return "" , "" , err
2023-08-24 20:18:17 +01:00
}
mak . Set ( & secret . StringData , "serve-config" , string ( j ) )
}
2024-01-09 14:13:22 +00:00
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
if orig != nil {
2024-01-09 14:13:22 +00:00
logger . Debugf ( "patching existing state Secret with values %s" , secret . Data [ tailscaledConfigKey ] )
cmd/k8s-operator,ipn/store/kubestore: patch secrets instead of updating
We would call Update on the secret, but that was racey and would occasionaly
fail. Instead use patch whenever we can.
Fixes errors like
```
boot: 2023/08/29 01:03:53 failed to set serve config: sending serve config: updating config: writing ServeConfig to StateStore: Operation cannot be fulfilled on secrets "ts-webdav-kfrzv-0": the object has been modified; please apply your changes to the latest version and try again
{"level":"error","ts":"2023-08-29T01:03:48Z","msg":"Reconciler error","controller":"ingress","controllerGroup":"networking.k8s.io","controllerKind":"Ingress","Ingress":{"name":"webdav","namespace":"default"},"namespace":"default","name":"webdav","reconcileID":"96f5cfed-7782-4834-9b75-b0950fd563ed","error":"failed to provision: failed to create or get API key secret: Operation cannot be fulfilled on secrets \"ts-webdav-kfrzv-0\": the object has been modified; please apply your changes to the latest version and try again","stacktrace":"sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:324\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\tsigs.k8s.io/controller-runtime@v0.15.0/pkg/internal/controller/controller.go:226"}
```
Updates #502
Updates #7895
Signed-off-by: Maisem Ali <maisem@tailscale.com>
2023-08-29 20:43:22 +01:00
if err := a . Patch ( ctx , secret , client . MergeFrom ( orig ) ) ; err != nil {
2024-01-09 14:13:22 +00:00
return "" , "" , err
2023-08-24 20:18:17 +01:00
}
} else {
2024-01-09 14:13:22 +00:00
logger . Debugf ( "creating new state Secret with authkey %s" , secret . Data [ tailscaledConfigKey ] )
2023-08-24 20:18:17 +01:00
if err := a . Create ( ctx , secret ) ; err != nil {
2024-01-09 14:13:22 +00:00
return "" , "" , err
2023-08-24 20:18:17 +01:00
}
2023-08-23 16:35:12 +01:00
}
2024-01-09 14:13:22 +00:00
return secret . Name , hash , nil
2023-08-23 16:35:12 +01:00
}
// DeviceInfo returns the device ID and hostname for the Tailscale device
// associated with the given labels.
2023-08-09 00:03:08 +01:00
func ( a * tailscaleSTSReconciler ) DeviceInfo ( ctx context . Context , childLabels map [ string ] string ) ( id tailcfg . StableNodeID , hostname string , ips [ ] string , err error ) {
2023-08-23 16:35:12 +01:00
sec , err := getSingleObject [ corev1 . Secret ] ( ctx , a . Client , a . operatorNamespace , childLabels )
if err != nil {
2023-08-09 00:03:08 +01:00
return "" , "" , nil , err
2023-08-23 16:35:12 +01:00
}
if sec == nil {
2023-08-09 00:03:08 +01:00
return "" , "" , nil , nil
2023-08-23 16:35:12 +01:00
}
id = tailcfg . StableNodeID ( sec . Data [ "device_id" ] )
if id == "" {
2023-08-09 00:03:08 +01:00
return "" , "" , nil , nil
2023-08-23 16:35:12 +01:00
}
// Kubernetes chokes on well-formed FQDNs with the trailing dot, so we have
// to remove it.
hostname = strings . TrimSuffix ( string ( sec . Data [ "device_fqdn" ] ) , "." )
if hostname == "" {
2023-08-09 00:03:08 +01:00
return "" , "" , nil , nil
2023-08-23 16:35:12 +01:00
}
2023-08-09 00:03:08 +01:00
if rawDeviceIPs , ok := sec . Data [ "device_ips" ] ; ok {
if err := json . Unmarshal ( rawDeviceIPs , & ips ) ; err != nil {
return "" , "" , nil , err
}
}
return id , hostname , ips , nil
2023-08-23 16:35:12 +01:00
}
func ( a * tailscaleSTSReconciler ) newAuthKey ( ctx context . Context , tags [ ] string ) ( string , error ) {
caps := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
Reusable : false ,
Preauthorized : true ,
Tags : tags ,
} ,
} ,
}
key , _ , err := a . tsClient . CreateKey ( ctx , caps )
if err != nil {
return "" , err
}
return key , nil
}
2023-10-30 18:18:09 +00:00
//go:embed deploy/manifests/proxy.yaml
2023-08-23 16:35:12 +01:00
var proxyYaml [ ] byte
2023-10-30 18:18:09 +00:00
//go:embed deploy/manifests/userspace-proxy.yaml
2023-08-24 20:18:17 +01:00
var userspaceProxyYaml [ ] byte
2024-01-09 14:13:22 +00:00
func ( a * tailscaleSTSReconciler ) reconcileSTS ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig , headlessSvc * corev1 . Service , proxySecret , tsConfigHash string ) ( * appsv1 . StatefulSet , error ) {
2023-08-23 16:35:12 +01:00
var ss appsv1 . StatefulSet
2023-08-24 20:18:17 +01:00
if sts . ServeConfig != nil {
if err := yaml . Unmarshal ( userspaceProxyYaml , & ss ) ; err != nil {
return nil , fmt . Errorf ( "failed to unmarshal proxy spec: %w" , err )
}
} else {
if err := yaml . Unmarshal ( proxyYaml , & ss ) ; err != nil {
return nil , fmt . Errorf ( "failed to unmarshal proxy spec: %w" , err )
}
2023-10-17 01:25:00 +01:00
for i := range ss . Spec . Template . Spec . InitContainers {
c := & ss . Spec . Template . Spec . InitContainers [ i ]
if c . Name == "sysctler" {
c . Image = a . proxyImage
break
}
}
2023-08-23 16:35:12 +01:00
}
container := & ss . Spec . Template . Spec . Containers [ 0 ]
container . Image = a . proxyImage
2024-01-09 14:13:22 +00:00
ss . ObjectMeta = metav1 . ObjectMeta {
Name : headlessSvc . Name ,
Namespace : a . operatorNamespace ,
Labels : sts . ChildResourceLabels ,
}
ss . Spec . ServiceName = headlessSvc . Name
ss . Spec . Selector = & metav1 . LabelSelector {
MatchLabels : map [ string ] string {
"app" : sts . ParentResourceUID ,
} ,
}
mak . Set ( & ss . Spec . Template . Labels , "app" , sts . ParentResourceUID )
// Generic containerboot configuration options.
2023-08-23 16:35:12 +01:00
container . Env = append ( container . Env ,
corev1 . EnvVar {
Name : "TS_KUBE_SECRET" ,
2024-01-09 14:13:22 +00:00
Value : proxySecret ,
2023-08-23 16:35:12 +01:00
} ,
2024-01-09 14:13:22 +00:00
)
if ! shouldDoTailscaledDeclarativeConfig ( sts ) {
container . Env = append ( container . Env , corev1 . EnvVar {
2023-08-23 16:35:12 +01:00
Name : "TS_HOSTNAME" ,
Value : sts . Hostname ,
} )
2024-01-09 14:13:22 +00:00
// containerboot currently doesn't have a way to re-read the hostname/ip as
// it is passed via an environment variable. So we need to restart the
// container when the value changes. We do this by adding an annotation to
// the pod template that contains the last value we set.
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetHostname , sts . Hostname )
}
// Configure containeboot to run tailscaled with a configfile read from the state Secret.
if shouldDoTailscaledDeclarativeConfig ( sts ) {
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetConfigFileHash , tsConfigHash )
ss . Spec . Template . Spec . Volumes = append ( ss . Spec . Template . Spec . Volumes , corev1 . Volume {
Name : "tailscaledconfig" ,
VolumeSource : corev1 . VolumeSource {
Secret : & corev1 . SecretVolumeSource {
SecretName : proxySecret ,
Items : [ ] corev1 . KeyToPath { {
Key : tailscaledConfigKey ,
Path : tailscaledConfigKey ,
} } ,
} ,
} ,
} )
container . VolumeMounts = append ( container . VolumeMounts , corev1 . VolumeMount {
Name : "tailscaledconfig" ,
ReadOnly : true ,
MountPath : "/etc/tsconfig" ,
} )
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "EXPERIMENTAL_TS_CONFIGFILE_PATH" ,
Value : "/etc/tsconfig/tailscaled" ,
} )
}
if a . tsFirewallMode != "" {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_DEBUG_FIREWALL_MODE" ,
Value : a . tsFirewallMode ,
} )
}
ss . Spec . Template . Spec . PriorityClassName = a . proxyPriorityClassName
// Ingress/egress proxy configuration options.
2023-08-30 08:31:37 +01:00
if sts . ClusterTargetIP != "" {
2023-08-24 20:18:17 +01:00
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_DEST_IP" ,
2023-08-30 08:31:37 +01:00
Value : sts . ClusterTargetIP ,
} )
2024-01-09 14:13:22 +00:00
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetClusterIP , sts . ClusterTargetIP )
2023-08-30 08:31:37 +01:00
} else if sts . TailnetTargetIP != "" {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_TAILNET_TARGET_IP" ,
Value : sts . TailnetTargetIP ,
2023-08-24 20:18:17 +01:00
} )
2024-01-09 14:13:22 +00:00
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetTailnetTargetIP , sts . TailnetTargetIP )
2023-11-24 16:24:48 +00:00
} else if sts . TailnetTargetFQDN != "" {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_TAILNET_TARGET_FQDN" ,
Value : sts . TailnetTargetFQDN ,
} )
2024-01-09 14:13:22 +00:00
mak . Set ( & ss . Spec . Template . Annotations , podAnnotationLastSetTailnetTargetFQDN , sts . TailnetTargetFQDN )
2023-08-24 20:18:17 +01:00
} else if sts . ServeConfig != nil {
container . Env = append ( container . Env , corev1 . EnvVar {
Name : "TS_SERVE_CONFIG" ,
Value : "/etc/tailscaled/serve-config" ,
} )
container . VolumeMounts = append ( container . VolumeMounts , corev1 . VolumeMount {
Name : "serve-config" ,
ReadOnly : true ,
MountPath : "/etc/tailscaled" ,
} )
ss . Spec . Template . Spec . Volumes = append ( ss . Spec . Template . Spec . Volumes , corev1 . Volume {
Name : "serve-config" ,
VolumeSource : corev1 . VolumeSource {
Secret : & corev1 . SecretVolumeSource {
2024-01-09 14:13:22 +00:00
SecretName : proxySecret ,
2023-08-24 20:18:17 +01:00
Items : [ ] corev1 . KeyToPath { {
Key : "serve-config" ,
Path : "serve-config" ,
} } ,
} ,
} ,
} )
2023-08-23 16:35:12 +01:00
}
2024-01-09 14:13:22 +00:00
logger . Debugf ( "reconciling statefulset %s/%s" , ss . GetNamespace ( ) , ss . GetName ( ) )
return createOrUpdate ( ctx , a . Client , a . operatorNamespace , & ss , func ( s * appsv1 . StatefulSet ) { s . Spec = ss . Spec } )
}
2023-08-24 20:16:58 +01:00
2024-01-09 14:13:22 +00:00
// tailscaledConfig takes a proxy config, a newly generated auth key if
// generated and a Secret with the previous proxy state and auth key and
// produces returns tailscaled configuration and a hash of that configuration.
func tailscaledConfig ( stsC * tailscaleSTSConfig , newAuthkey string , oldSecret * corev1 . Secret ) ( [ ] byte , string , error ) {
conf := ipn . ConfigVAlpha {
Version : "alpha0" ,
AcceptDNS : "false" ,
Locked : "false" ,
Hostname : & stsC . Hostname ,
}
if stsC . Connector != nil {
routes , err := netutil . CalcAdvertiseRoutes ( stsC . Connector . routes , stsC . Connector . isExitNode )
if err != nil {
return nil , "" , fmt . Errorf ( "error calculating routes: %w" , err )
}
conf . AdvertiseRoutes = routes
}
if newAuthkey != "" {
conf . AuthKey = & newAuthkey
} else if oldSecret != nil && len ( oldSecret . Data [ tailscaledConfigKey ] ) > 0 { // write to StringData, read from Data as StringData is write-only
origConf := & ipn . ConfigVAlpha { }
if err := json . Unmarshal ( [ ] byte ( oldSecret . Data [ tailscaledConfigKey ] ) , origConf ) ; err != nil {
return nil , "" , fmt . Errorf ( "error unmarshaling previous tailscaled config: %w" , err )
}
conf . AuthKey = origConf . AuthKey
2023-08-24 20:16:58 +01:00
}
2024-01-09 14:13:22 +00:00
confFileBytes , err := json . Marshal ( conf )
if err != nil {
return nil , "" , fmt . Errorf ( "error marshaling tailscaled config : %w" , err )
2023-11-24 16:24:48 +00:00
}
2024-01-09 14:13:22 +00:00
hash , err := hashBytes ( confFileBytes )
if err != nil {
return nil , "" , fmt . Errorf ( "error calculating config hash: %w" , err )
2023-08-23 16:35:12 +01:00
}
2024-01-09 14:13:22 +00:00
return confFileBytes , hash , nil
2023-08-23 16:35:12 +01:00
}
// ptrObject is a type constraint for pointer types that implement
// client.Object.
type ptrObject [ T any ] interface {
client . Object
* T
}
2024-01-09 14:13:22 +00:00
// hashBytes produces a hash for the provided bytes that is the same across
// different invocations of this code. We do not use the
// tailscale.com/deephash.Hash here because that produces a different hash for
// the same value in different tailscale builds. The hash we are producing here
// is used to determine if the container running the Connector Tailscale node
// needs to be restarted. The container does not need restarting when the only
// thing that changed is operator version (the hash is also exposed to users via
// an annotation and might be confusing if it changes without the config having
// changed).
func hashBytes ( b [ ] byte ) ( string , error ) {
h := sha256 . New ( )
_ , err := h . Write ( b )
if err != nil {
return "" , fmt . Errorf ( "error calculating hash: %w" , err )
}
return fmt . Sprintf ( "%x" , h . Sum ( nil ) ) , nil
}
2023-08-23 16:35:12 +01:00
// createOrUpdate adds obj to the k8s cluster, unless the object already exists,
// in which case update is called to make changes to it. If update is nil, the
// existing object is returned unmodified.
//
// obj is looked up by its Name and Namespace if Name is set, otherwise it's
// looked up by labels.
func createOrUpdate [ T any , O ptrObject [ T ] ] ( ctx context . Context , c client . Client , ns string , obj O , update func ( O ) ) ( O , error ) {
var (
existing O
err error
)
if obj . GetName ( ) != "" {
existing = new ( T )
existing . SetName ( obj . GetName ( ) )
existing . SetNamespace ( obj . GetNamespace ( ) )
err = c . Get ( ctx , client . ObjectKeyFromObject ( obj ) , existing )
} else {
existing , err = getSingleObject [ T , O ] ( ctx , c , ns , obj . GetLabels ( ) )
}
if err == nil && existing != nil {
if update != nil {
update ( existing )
if err := c . Update ( ctx , existing ) ; err != nil {
return nil , err
}
}
return existing , nil
}
if err != nil && ! apierrors . IsNotFound ( err ) {
return nil , fmt . Errorf ( "failed to get object: %w" , err )
}
if err := c . Create ( ctx , obj ) ; err != nil {
return nil , err
}
return obj , nil
}
// getSingleObject searches for k8s objects of type T
// (e.g. corev1.Service) with the given labels, and returns
// it. Returns nil if no objects match the labels, and an error if
// more than one object matches.
func getSingleObject [ T any , O ptrObject [ T ] ] ( ctx context . Context , c client . Client , ns string , labels map [ string ] string ) ( O , error ) {
ret := O ( new ( T ) )
kinds , _ , err := c . Scheme ( ) . ObjectKinds ( ret )
if err != nil {
return nil , err
}
if len ( kinds ) != 1 {
// TODO: the runtime package apparently has a "pick the best
// GVK" function somewhere that might be good enough?
return nil , fmt . Errorf ( "more than 1 GroupVersionKind for %T" , ret )
}
gvk := kinds [ 0 ]
gvk . Kind += "List"
lst := unstructured . UnstructuredList { }
lst . SetGroupVersionKind ( gvk )
if err := c . List ( ctx , & lst , client . InNamespace ( ns ) , client . MatchingLabels ( labels ) ) ; err != nil {
return nil , err
}
if len ( lst . Items ) == 0 {
return nil , nil
}
if len ( lst . Items ) > 1 {
return nil , fmt . Errorf ( "found multiple matching %T objects" , ret )
}
if err := c . Scheme ( ) . Convert ( & lst . Items [ 0 ] , ret , nil ) ; err != nil {
return nil , err
}
return ret , nil
}
func defaultBool ( envName string , defVal bool ) bool {
vs := os . Getenv ( envName )
if vs == "" {
return defVal
}
v , _ := opt . Bool ( vs ) . Get ( )
return v
}
func defaultEnv ( envName , defVal string ) string {
v := os . Getenv ( envName )
if v == "" {
return defVal
}
return v
}
func nameForService ( svc * corev1 . Service ) ( string , error ) {
if h , ok := svc . Annotations [ AnnotationHostname ] ; ok {
if err := dnsname . ValidLabel ( h ) ; err != nil {
return "" , fmt . Errorf ( "invalid Tailscale hostname %q: %w" , h , err )
}
return h , nil
}
return svc . Namespace + "-" + svc . Name , nil
}
2023-10-17 18:05:02 +01:00
func isValidFirewallMode ( m string ) bool {
return m == "auto" || m == "nftables" || m == "iptables"
}
2024-01-09 14:13:22 +00:00
// shouldDoTailscaledDeclarativeConfig determines whether the proxy instance
// should be configured to run tailscaled only with a all config opts passed to
// tailscaled.
func shouldDoTailscaledDeclarativeConfig ( stsC * tailscaleSTSConfig ) bool {
return stsC . Connector != nil
}