293 lines
10 KiB
Go
293 lines
10 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/netip"
|
|
"slices"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
"go.uber.org/zap"
|
|
xslices "golang.org/x/exp/slices"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/client-go/tools/record"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
tsoperator "tailscale.com/k8s-operator"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/tstime"
|
|
"tailscale.com/util/clientmetric"
|
|
"tailscale.com/util/set"
|
|
)
|
|
|
|
const (
|
|
reasonConnectorCreationFailed = "ConnectorCreationFailed"
|
|
|
|
reasonConnectorCreated = "ConnectorCreated"
|
|
reasonConnectorCleanupFailed = "ConnectorCleanupFailed"
|
|
reasonConnectorCleanupInProgress = "ConnectorCleanupInProgress"
|
|
reasonConnectorInvalid = "ConnectorInvalid"
|
|
|
|
messageConnectorCreationFailed = "Failed creating Connector: %v"
|
|
messageConnectorInvalid = "Connector is invalid: %v"
|
|
|
|
shortRequeue = time.Second * 5
|
|
)
|
|
|
|
type ConnectorReconciler struct {
|
|
client.Client
|
|
|
|
recorder record.EventRecorder
|
|
ssr *tailscaleSTSReconciler
|
|
logger *zap.SugaredLogger
|
|
|
|
tsnamespace string
|
|
|
|
clock tstime.Clock
|
|
|
|
mu sync.Mutex // protects following
|
|
|
|
subnetRouters set.Slice[types.UID] // for subnet routers gauge
|
|
exitNodes set.Slice[types.UID] // for exit nodes gauge
|
|
}
|
|
|
|
var (
|
|
// gaugeConnectorResources tracks the overall number of Connectors currently managed by this operator instance.
|
|
gaugeConnectorResources = clientmetric.NewGauge("k8s_connector_resources")
|
|
// gaugeConnectorSubnetRouterResources tracks the number of Connectors managed by this operator instance that are subnet routers.
|
|
gaugeConnectorSubnetRouterResources = clientmetric.NewGauge("k8s_connector_subnetrouter_resources")
|
|
// gaugeConnectorExitNodeResources tracks the number of Connectors currently managed by this operator instance that are exit nodes.
|
|
gaugeConnectorExitNodeResources = clientmetric.NewGauge("k8s_connector_exitnode_resources")
|
|
)
|
|
|
|
func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
|
logger := a.logger.With("Connector", req.Name)
|
|
logger.Debugf("starting reconcile")
|
|
defer logger.Debugf("reconcile finished")
|
|
|
|
cn := new(tsapi.Connector)
|
|
err = a.Get(ctx, req.NamespacedName, cn)
|
|
if apierrors.IsNotFound(err) {
|
|
logger.Debugf("Connector not found, assuming it was deleted")
|
|
return reconcile.Result{}, nil
|
|
} else if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Connector: %w", err)
|
|
}
|
|
if !cn.DeletionTimestamp.IsZero() {
|
|
logger.Debugf("Connector is being deleted or should not be exposed, cleaning up resources")
|
|
ix := xslices.Index(cn.Finalizers, FinalizerName)
|
|
if ix < 0 {
|
|
logger.Debugf("no finalizer, nothing to do")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
if done, err := a.maybeCleanupConnector(ctx, logger, cn); err != nil {
|
|
return reconcile.Result{}, err
|
|
} else if !done {
|
|
logger.Debugf("Connector resource cleanup not yet finished, will retry...")
|
|
return reconcile.Result{RequeueAfter: shortRequeue}, nil
|
|
}
|
|
|
|
cn.Finalizers = append(cn.Finalizers[:ix], cn.Finalizers[ix+1:]...)
|
|
if err := a.Update(ctx, cn); err != nil {
|
|
return reconcile.Result{}, err
|
|
}
|
|
logger.Infof("Connector resources cleaned up")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
oldCnStatus := cn.Status.DeepCopy()
|
|
setStatus := func(cn *tsapi.Connector, _ tsapi.ConnectorConditionType, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
|
|
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, status, reason, message, cn.Generation, a.clock, logger)
|
|
if !apiequality.Semantic.DeepEqual(oldCnStatus, cn.Status) {
|
|
// An error encountered here should get returned by the Reconcile function.
|
|
if updateErr := a.Client.Status().Update(ctx, cn); updateErr != nil {
|
|
err = errors.Wrap(err, updateErr.Error())
|
|
}
|
|
}
|
|
return res, err
|
|
}
|
|
|
|
if !slices.Contains(cn.Finalizers, FinalizerName) {
|
|
// This log line is printed exactly once during initial provisioning,
|
|
// because once the finalizer is in place this block gets skipped. So,
|
|
// this is a nice place to tell the operator that the high level,
|
|
// multi-reconcile operation is underway.
|
|
logger.Infof("ensuring Connector is set up")
|
|
cn.Finalizers = append(cn.Finalizers, FinalizerName)
|
|
if err := a.Update(ctx, cn); err != nil {
|
|
logger.Errorf("error adding finalizer: %w", err)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, reasonConnectorCreationFailed)
|
|
}
|
|
}
|
|
|
|
if err := a.validate(cn); err != nil {
|
|
logger.Errorf("error validating Connector spec: %w", err)
|
|
message := fmt.Sprintf(messageConnectorInvalid, err)
|
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorInvalid, message)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorInvalid, message)
|
|
}
|
|
|
|
if err = a.maybeProvisionConnector(ctx, logger, cn); err != nil {
|
|
logger.Errorf("error creating Connector resources: %w", err)
|
|
message := fmt.Sprintf(messageConnectorCreationFailed, err)
|
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorCreationFailed, message)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, message)
|
|
}
|
|
|
|
logger.Info("Connector resources synced")
|
|
cn.Status.IsExitNode = cn.Spec.ExitNode
|
|
if cn.Spec.SubnetRouter != nil {
|
|
cn.Status.SubnetRoutes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
|
}
|
|
cn.Status.SubnetRoutes = ""
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
|
}
|
|
|
|
// maybeProvisionConnector ensures that any new resources required for this
|
|
// Connector instance are deployed to the cluster.
|
|
func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) error {
|
|
hostname := cn.Name + "-connector"
|
|
if cn.Spec.Hostname != "" {
|
|
hostname = string(cn.Spec.Hostname)
|
|
}
|
|
crl := childResourceLabels(cn.Name, a.tsnamespace, "connector")
|
|
|
|
proxyClass := cn.Spec.ProxyClass
|
|
if proxyClass != "" {
|
|
if ready, err := proxyClassIsReady(ctx, proxyClass, a.Client); err != nil {
|
|
return fmt.Errorf("error verifying ProxyClass for Connector: %w", err)
|
|
} else if !ready {
|
|
logger.Infof("ProxyClass %s specified for the Connector, but is not (yet) Ready, waiting..", proxyClass)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
sts := &tailscaleSTSConfig{
|
|
ParentResourceName: cn.Name,
|
|
ParentResourceUID: string(cn.UID),
|
|
Hostname: hostname,
|
|
ChildResourceLabels: crl,
|
|
Tags: cn.Spec.Tags.Stringify(),
|
|
Connector: &connector{
|
|
isExitNode: cn.Spec.ExitNode,
|
|
},
|
|
ProxyClass: proxyClass,
|
|
}
|
|
|
|
if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 {
|
|
sts.Connector.routes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
|
}
|
|
|
|
a.mu.Lock()
|
|
if sts.Connector.isExitNode {
|
|
a.exitNodes.Add(cn.UID)
|
|
} else {
|
|
a.exitNodes.Remove(cn.UID)
|
|
}
|
|
if sts.Connector.routes != "" {
|
|
a.subnetRouters.Add(cn.GetUID())
|
|
} else {
|
|
a.subnetRouters.Remove(cn.GetUID())
|
|
}
|
|
a.mu.Unlock()
|
|
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
|
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
|
var connectors set.Slice[types.UID]
|
|
connectors.AddSlice(a.exitNodes.Slice())
|
|
connectors.AddSlice(a.subnetRouters.Slice())
|
|
gaugeConnectorResources.Set(int64(connectors.Len()))
|
|
|
|
_, err := a.ssr.Provision(ctx, logger, sts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
_, tsHost, ips, err := a.ssr.DeviceInfo(ctx, crl)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if tsHost == "" {
|
|
logger.Debugf("no Tailscale hostname known yet, waiting for connector pod to finish auth")
|
|
// No hostname yet. Wait for the connector pod to auth.
|
|
cn.Status.TailnetIPs = nil
|
|
cn.Status.Hostname = ""
|
|
return nil
|
|
}
|
|
|
|
cn.Status.TailnetIPs = ips
|
|
cn.Status.Hostname = tsHost
|
|
|
|
return nil
|
|
}
|
|
|
|
func (a *ConnectorReconciler) maybeCleanupConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
|
|
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector")); err != nil {
|
|
return false, fmt.Errorf("failed to cleanup Connector resources: %w", err)
|
|
} else if !done {
|
|
logger.Debugf("Connector cleanup not done yet, waiting for next reconcile")
|
|
return false, nil
|
|
}
|
|
|
|
// Unlike most log entries in the reconcile loop, this will get printed
|
|
// exactly once at the very end of cleanup, because the final step of
|
|
// cleanup removes the tailscale finalizer, which will make all future
|
|
// reconciles exit early.
|
|
logger.Infof("cleaned up Connector resources")
|
|
a.mu.Lock()
|
|
a.subnetRouters.Remove(cn.UID)
|
|
a.exitNodes.Remove(cn.UID)
|
|
a.mu.Unlock()
|
|
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
|
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
|
var connectors set.Slice[types.UID]
|
|
connectors.AddSlice(a.exitNodes.Slice())
|
|
connectors.AddSlice(a.subnetRouters.Slice())
|
|
gaugeConnectorResources.Set(int64(connectors.Len()))
|
|
return true, nil
|
|
}
|
|
|
|
func (a *ConnectorReconciler) validate(cn *tsapi.Connector) error {
|
|
// Connector fields are already validated at apply time with CEL validation
|
|
// on custom resource fields. The checks here are a backup in case the
|
|
// CEL validation breaks without us noticing.
|
|
if !(cn.Spec.SubnetRouter != nil || cn.Spec.ExitNode) {
|
|
return errors.New("invalid spec: a Connector must expose subnet routes or act as an exit node (or both)")
|
|
}
|
|
if cn.Spec.SubnetRouter == nil {
|
|
return nil
|
|
}
|
|
return validateSubnetRouter(cn.Spec.SubnetRouter)
|
|
}
|
|
|
|
func validateSubnetRouter(sb *tsapi.SubnetRouter) error {
|
|
if len(sb.AdvertiseRoutes) < 1 {
|
|
return errors.New("invalid subnet router spec: no routes defined")
|
|
}
|
|
var err error
|
|
for _, route := range sb.AdvertiseRoutes {
|
|
pfx, e := netip.ParsePrefix(string(route))
|
|
if e != nil {
|
|
err = errors.Wrap(err, fmt.Sprintf("route %s is invalid: %v", route, err))
|
|
continue
|
|
}
|
|
if pfx.Masked() != pfx {
|
|
err = errors.Wrap(err, fmt.Sprintf("route %s has non-address bits set; expected %s", pfx, pfx.Masked()))
|
|
}
|
|
}
|
|
return err
|
|
}
|