diff --git a/cmd/containerboot/forwarding.go b/cmd/containerboot/forwarding.go new file mode 100644 index 000000000..050bf31c7 --- /dev/null +++ b/cmd/containerboot/forwarding.go @@ -0,0 +1,262 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package main + +import ( + "context" + "fmt" + "log" + "net" + "net/netip" + "os" + "path/filepath" + "strings" + + "tailscale.com/util/linuxfw" +) + +// ensureIPForwarding enables IPv4/IPv6 forwarding for the container. +func ensureIPForwarding(root, clusterProxyTargetIP, tailnetTargetIP, tailnetTargetFQDN string, routes *string) error { + var ( + v4Forwarding, v6Forwarding bool + ) + if clusterProxyTargetIP != "" { + proxyIP, err := netip.ParseAddr(clusterProxyTargetIP) + if err != nil { + return fmt.Errorf("invalid cluster destination IP: %v", err) + } + if proxyIP.Is4() { + v4Forwarding = true + } else { + v6Forwarding = true + } + } + if tailnetTargetIP != "" { + proxyIP, err := netip.ParseAddr(tailnetTargetIP) + if err != nil { + return fmt.Errorf("invalid tailnet destination IP: %v", err) + } + if proxyIP.Is4() { + v4Forwarding = true + } else { + v6Forwarding = true + } + } + // Currently we only proxy traffic to the IPv4 address of the tailnet + // target. + if tailnetTargetFQDN != "" { + v4Forwarding = true + } + if routes != nil && *routes != "" { + for _, route := range strings.Split(*routes, ",") { + cidr, err := netip.ParsePrefix(route) + if err != nil { + return fmt.Errorf("invalid subnet route: %v", err) + } + if cidr.Addr().Is4() { + v4Forwarding = true + } else { + v6Forwarding = true + } + } + } + return enableIPForwarding(v4Forwarding, v6Forwarding, root) +} + +func enableIPForwarding(v4Forwarding, v6Forwarding bool, root string) error { + var paths []string + if v4Forwarding { + paths = append(paths, filepath.Join(root, "proc/sys/net/ipv4/ip_forward")) + } + if v6Forwarding { + paths = append(paths, filepath.Join(root, "proc/sys/net/ipv6/conf/all/forwarding")) + } + + // In some common configurations (e.g. default docker, + // kubernetes), the container environment denies write access to + // most sysctls, including IP forwarding controls. Check the + // sysctl values before trying to change them, so that we + // gracefully do nothing if the container's already been set up + // properly by e.g. a k8s initContainer. + for _, path := range paths { + bs, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("reading %q: %w", path, err) + } + if v := strings.TrimSpace(string(bs)); v != "1" { + if err := os.WriteFile(path, []byte("1"), 0644); err != nil { + return fmt.Errorf("enabling %q: %w", path, err) + } + } + } + return nil +} + +func installEgressForwardingRule(_ context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { + dst, err := netip.ParseAddr(dstStr) + if err != nil { + return err + } + var local netip.Addr + for _, pfx := range tsIPs { + if !pfx.IsSingleIP() { + continue + } + if pfx.Addr().Is4() != dst.Is4() { + continue + } + local = pfx.Addr() + break + } + if !local.IsValid() { + return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) + } + if err := nfr.DNATNonTailscaleTraffic("tailscale0", dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) + } + if err := nfr.AddSNATRuleForDst(local, dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) + } + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) + } + return nil +} + +// installTSForwardingRuleForDestination accepts a destination address and a +// list of node's tailnet addresses, sets up rules to forward traffic for +// destination to the tailnet IP matching the destination IP family. +// Destination can be Pod IP of this node. +func installTSForwardingRuleForDestination(_ context.Context, dstFilter string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { + dst, err := netip.ParseAddr(dstFilter) + if err != nil { + return err + } + var local netip.Addr + for _, pfx := range tsIPs { + if !pfx.IsSingleIP() { + continue + } + if pfx.Addr().Is4() != dst.Is4() { + continue + } + local = pfx.Addr() + break + } + if !local.IsValid() { + return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstFilter, tsIPs) + } + if err := nfr.AddDNATRule(dst, local); err != nil { + return fmt.Errorf("installing rule for forwarding traffic to tailnet IP: %w", err) + } + return nil +} + +func installIngressForwardingRule(_ context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { + dst, err := netip.ParseAddr(dstStr) + if err != nil { + return err + } + var local netip.Addr + proxyHasIPv4Address := false + for _, pfx := range tsIPs { + if !pfx.IsSingleIP() { + continue + } + if pfx.Addr().Is4() { + proxyHasIPv4Address = true + } + if pfx.Addr().Is4() != dst.Is4() { + continue + } + local = pfx.Addr() + break + } + if proxyHasIPv4Address && dst.Is6() { + log.Printf("Warning: proxy backend ClusterIP is an IPv6 address and the proxy has a IPv4 tailnet address. You might need to disable IPv4 address allocation for the proxy for forwarding to work. See https://github.com/tailscale/tailscale/issues/12156") + } + if !local.IsValid() { + return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) + } + if err := nfr.AddDNATRule(local, dst); err != nil { + return fmt.Errorf("installing ingress proxy rules: %w", err) + } + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("installing ingress proxy rules: %w", err) + } + return nil +} + +func installIngressForwardingRuleForDNSTarget(_ context.Context, backendAddrs []net.IP, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { + var ( + tsv4 netip.Addr + tsv6 netip.Addr + v4Backends []netip.Addr + v6Backends []netip.Addr + ) + for _, pfx := range tsIPs { + if pfx.IsSingleIP() && pfx.Addr().Is4() { + tsv4 = pfx.Addr() + continue + } + if pfx.IsSingleIP() && pfx.Addr().Is6() { + tsv6 = pfx.Addr() + continue + } + } + // TODO: log if more than one backend address is found and firewall is + // in nftables mode that only the first IP will be used. + for _, ip := range backendAddrs { + if ip.To4() != nil { + v4Backends = append(v4Backends, netip.AddrFrom4([4]byte(ip.To4()))) + } + if ip.To16() != nil { + v6Backends = append(v6Backends, netip.AddrFrom16([16]byte(ip.To16()))) + } + } + + // Enable IP forwarding here as opposed to at the start of containerboot + // as the IPv4/IPv6 requirements might have changed. + // For Kubernetes operator proxies, forwarding for both IPv4 and IPv6 is + // enabled by an init container, so in practice enabling forwarding here + // is only needed if this proxy has been configured by manually setting + // TS_EXPERIMENTAL_DEST_DNS_NAME env var for a containerboot instance. + if err := enableIPForwarding(len(v4Backends) != 0, len(v6Backends) != 0, ""); err != nil { + log.Printf("[unexpected] failed to ensure IP forwarding: %v", err) + } + + updateFirewall := func(dst netip.Addr, backendTargets []netip.Addr) error { + if err := nfr.DNATWithLoadBalancer(dst, backendTargets); err != nil { + return fmt.Errorf("installing DNAT rules for ingress backends %+#v: %w", backendTargets, err) + } + // The backend might advertize MSS higher than that of the + // tailscale interfaces. Clamp MSS of packets going out via + // tailscale0 interface to its MTU to prevent broken connections + // in environments where path MTU discovery is not working. + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("adding rule to clamp traffic via tailscale0: %v", err) + } + return nil + } + + if len(v4Backends) != 0 { + if !tsv4.IsValid() { + log.Printf("backend targets %v contain at least one IPv4 address, but this node's Tailscale IPs do not contain a valid IPv4 address: %v", backendAddrs, tsIPs) + } else if err := updateFirewall(tsv4, v4Backends); err != nil { + return fmt.Errorf("Installing IPv4 firewall rules: %w", err) + } + } + if len(v6Backends) != 0 && !tsv6.IsValid() { + if !tsv6.IsValid() { + log.Printf("backend targets %v contain at least one IPv6 address, but this node's Tailscale IPs do not contain a valid IPv6 address: %v", backendAddrs, tsIPs) + } else if !nfr.HasIPV6NAT() { + log.Printf("backend targets %v contain at least one IPv6 address, but the chosen firewall mode does not support IPv6 NAT", backendAddrs) + } else if err := updateFirewall(tsv6, v6Backends); err != nil { + return fmt.Errorf("Installing IPv6 firewall rules: %w", err) + } + } + return nil +} diff --git a/cmd/containerboot/healthz.go b/cmd/containerboot/healthz.go new file mode 100644 index 000000000..fb7fccd96 --- /dev/null +++ b/cmd/containerboot/healthz.go @@ -0,0 +1,51 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package main + +import ( + "log" + "net" + "net/http" + "sync" +) + +// healthz is a simple health check server, if enabled it returns 200 OK if +// this tailscale node currently has at least one tailnet IP address else +// returns 503. +type healthz struct { + sync.Mutex + hasAddrs bool +} + +func (h *healthz) ServeHTTP(w http.ResponseWriter, r *http.Request) { + h.Lock() + defer h.Unlock() + if h.hasAddrs { + w.Write([]byte("ok")) + } else { + http.Error(w, "node currently has no tailscale IPs", http.StatusInternalServerError) + } +} + +// runHealthz runs a simple HTTP health endpoint on /healthz, listening on the +// provided address. A containerized tailscale instance is considered healthy if +// it has at least one tailnet IP address. +func runHealthz(addr string, h *healthz) { + lis, err := net.Listen("tcp", addr) + if err != nil { + log.Fatalf("error listening on the provided health endpoint address %q: %v", addr, err) + } + mux := http.NewServeMux() + mux.Handle("/healthz", h) + log.Printf("Running healthcheck endpoint at %s/healthz", addr) + hs := &http.Server{Handler: mux} + + go func() { + if err := hs.Serve(lis); err != nil { + log.Fatalf("failed running health endpoint: %v", err) + } + }() +} diff --git a/cmd/containerboot/kube.go b/cmd/containerboot/kube.go index ec2d3ef12..908cc01ef 100644 --- a/cmd/containerboot/kube.go +++ b/cmd/containerboot/kube.go @@ -8,7 +8,6 @@ package main import ( "context" "encoding/json" - "errors" "fmt" "log" "net/http" @@ -75,56 +74,6 @@ func deleteAuthKey(ctx context.Context, secretName string) error { var kc kubeclient.Client -// setupKube is responsible for doing any necessary configuration and checks to -// ensure that tailscale state storage and authentication mechanism will work on -// Kubernetes. -func (cfg *settings) setupKube(ctx context.Context) error { - if cfg.KubeSecret == "" { - return nil - } - canPatch, canCreate, err := kc.CheckSecretPermissions(ctx, cfg.KubeSecret) - if err != nil { - return fmt.Errorf("Some Kubernetes permissions are missing, please check your RBAC configuration: %v", err) - } - cfg.KubernetesCanPatch = canPatch - - s, err := kc.GetSecret(ctx, cfg.KubeSecret) - if err != nil && kubeclient.IsNotFoundErr(err) && !canCreate { - return fmt.Errorf("Tailscale state Secret %s does not exist and we don't have permissions to create it. "+ - "If you intend to store tailscale state elsewhere than a Kubernetes Secret, "+ - "you can explicitly set TS_KUBE_SECRET env var to an empty string. "+ - "Else ensure that RBAC is set up that allows the service account associated with this installation to create Secrets.", cfg.KubeSecret) - } else if err != nil && !kubeclient.IsNotFoundErr(err) { - return fmt.Errorf("Getting Tailscale state Secret %s: %v", cfg.KubeSecret, err) - } - - if cfg.AuthKey == "" && !isOneStepConfig(cfg) { - if s == nil { - log.Print("TS_AUTHKEY not provided and kube secret does not exist, login will be interactive if needed.") - return nil - } - keyBytes, _ := s.Data["authkey"] - key := string(keyBytes) - - if key != "" { - // This behavior of pulling authkeys from kube secrets was added - // at the same time as the patch permission, so we can enforce - // that we must be able to patch out the authkey after - // authenticating if you want to use this feature. This avoids - // us having to deal with the case where we might leave behind - // an unnecessary reusable authkey in a secret, like a rake in - // the grass. - if !cfg.KubernetesCanPatch { - return errors.New("authkey found in TS_KUBE_SECRET, but the pod doesn't have patch permissions on the secret to manage the authkey.") - } - cfg.AuthKey = key - } else { - log.Print("No authkey found in kube secret and TS_AUTHKEY not provided, login will be interactive if needed.") - } - } - return nil -} - func initKubeClient(root string) { if root != "/" { // If we are running in a test, we need to set the root path to the fake diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index fdf71c3ea..720eb278b 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -92,36 +92,28 @@ package main import ( - "bytes" "context" - "encoding/json" "errors" "fmt" "io/fs" "log" "math" "net" - "net/http" "net/netip" "os" - "os/exec" "os/signal" "path" "path/filepath" - "reflect" "slices" - "strconv" "strings" "sync" "sync/atomic" "syscall" "time" - "github.com/fsnotify/fsnotify" "golang.org/x/sys/unix" "tailscale.com/client/tailscale" "tailscale.com/ipn" - "tailscale.com/ipn/conffile" kubeutils "tailscale.com/k8s-operator" "tailscale.com/tailcfg" "tailscale.com/types/logger" @@ -650,221 +642,6 @@ runLoop: wg.Wait() } -// watchServeConfigChanges watches path for changes, and when it sees one, reads -// the serve config from it, replacing ${TS_CERT_DOMAIN} with certDomain, and -// applies it to lc. It exits when ctx is canceled. cdChanged is a channel that -// is written to when the certDomain changes, causing the serve config to be -// re-read and applied. -func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *tailscale.LocalClient) { - if certDomainAtomic == nil { - panic("cd must not be nil") - } - var tickChan <-chan time.Time - var eventChan <-chan fsnotify.Event - if w, err := fsnotify.NewWatcher(); err != nil { - log.Printf("failed to create fsnotify watcher, timer-only mode: %v", err) - ticker := time.NewTicker(5 * time.Second) - defer ticker.Stop() - tickChan = ticker.C - } else { - defer w.Close() - if err := w.Add(filepath.Dir(path)); err != nil { - log.Fatalf("failed to add fsnotify watch: %v", err) - } - eventChan = w.Events - } - - var certDomain string - var prevServeConfig *ipn.ServeConfig - for { - select { - case <-ctx.Done(): - return - case <-cdChanged: - certDomain = *certDomainAtomic.Load() - case <-tickChan: - case <-eventChan: - // We can't do any reasonable filtering on the event because of how - // k8s handles these mounts. So just re-read the file and apply it - // if it's changed. - } - if certDomain == "" { - continue - } - sc, err := readServeConfig(path, certDomain) - if err != nil { - log.Fatalf("failed to read serve config: %v", err) - } - if prevServeConfig != nil && reflect.DeepEqual(sc, prevServeConfig) { - continue - } - log.Printf("Applying serve config") - if err := lc.SetServeConfig(ctx, sc); err != nil { - log.Fatalf("failed to set serve config: %v", err) - } - prevServeConfig = sc - } -} - -// readServeConfig reads the ipn.ServeConfig from path, replacing -// ${TS_CERT_DOMAIN} with certDomain. -func readServeConfig(path, certDomain string) (*ipn.ServeConfig, error) { - if path == "" { - return nil, nil - } - j, err := os.ReadFile(path) - if err != nil { - return nil, err - } - j = bytes.ReplaceAll(j, []byte("${TS_CERT_DOMAIN}"), []byte(certDomain)) - var sc ipn.ServeConfig - if err := json.Unmarshal(j, &sc); err != nil { - return nil, err - } - return &sc, nil -} - -func startTailscaled(ctx context.Context, cfg *settings) (*tailscale.LocalClient, *os.Process, error) { - args := tailscaledArgs(cfg) - // tailscaled runs without context, since it needs to persist - // beyond the startup timeout in ctx. - cmd := exec.Command("tailscaled", args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - log.Printf("Starting tailscaled") - if err := cmd.Start(); err != nil { - return nil, nil, fmt.Errorf("starting tailscaled failed: %v", err) - } - - // Wait for the socket file to appear, otherwise API ops will racily fail. - log.Printf("Waiting for tailscaled socket") - for { - if ctx.Err() != nil { - log.Fatalf("Timed out waiting for tailscaled socket") - } - _, err := os.Stat(cfg.Socket) - if errors.Is(err, fs.ErrNotExist) { - time.Sleep(100 * time.Millisecond) - continue - } else if err != nil { - log.Fatalf("Waiting for tailscaled socket: %v", err) - } - break - } - - tsClient := &tailscale.LocalClient{ - Socket: cfg.Socket, - UseSocketOnly: true, - } - - return tsClient, cmd.Process, nil -} - -// tailscaledArgs uses cfg to construct the argv for tailscaled. -func tailscaledArgs(cfg *settings) []string { - args := []string{"--socket=" + cfg.Socket} - switch { - case cfg.InKubernetes && cfg.KubeSecret != "": - args = append(args, "--state=kube:"+cfg.KubeSecret) - if cfg.StateDir == "" { - cfg.StateDir = "/tmp" - } - fallthrough - case cfg.StateDir != "": - args = append(args, "--statedir="+cfg.StateDir) - default: - args = append(args, "--state=mem:", "--statedir=/tmp") - } - - if cfg.UserspaceMode { - args = append(args, "--tun=userspace-networking") - } else if err := ensureTunFile(cfg.Root); err != nil { - log.Fatalf("ensuring that /dev/net/tun exists: %v", err) - } - - if cfg.SOCKSProxyAddr != "" { - args = append(args, "--socks5-server="+cfg.SOCKSProxyAddr) - } - if cfg.HTTPProxyAddr != "" { - args = append(args, "--outbound-http-proxy-listen="+cfg.HTTPProxyAddr) - } - if cfg.TailscaledConfigFilePath != "" { - args = append(args, "--config="+cfg.TailscaledConfigFilePath) - } - if cfg.DaemonExtraArgs != "" { - args = append(args, strings.Fields(cfg.DaemonExtraArgs)...) - } - return args -} - -// tailscaleUp uses cfg to run 'tailscale up' everytime containerboot starts, or -// if TS_AUTH_ONCE is set, only the first time containerboot starts. -func tailscaleUp(ctx context.Context, cfg *settings) error { - args := []string{"--socket=" + cfg.Socket, "up"} - if cfg.AcceptDNS != nil && *cfg.AcceptDNS { - args = append(args, "--accept-dns=true") - } else { - args = append(args, "--accept-dns=false") - } - if cfg.AuthKey != "" { - args = append(args, "--authkey="+cfg.AuthKey) - } - // --advertise-routes can be passed an empty string to configure a - // device (that might have previously advertised subnet routes) to not - // advertise any routes. Respect an empty string passed by a user and - // use it to explicitly unset the routes. - if cfg.Routes != nil { - args = append(args, "--advertise-routes="+*cfg.Routes) - } - if cfg.Hostname != "" { - args = append(args, "--hostname="+cfg.Hostname) - } - if cfg.ExtraArgs != "" { - args = append(args, strings.Fields(cfg.ExtraArgs)...) - } - log.Printf("Running 'tailscale up'") - cmd := exec.CommandContext(ctx, "tailscale", args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - return fmt.Errorf("tailscale up failed: %v", err) - } - return nil -} - -// tailscaleSet uses cfg to run 'tailscale set' to set any known configuration -// options that are passed in via environment variables. This is run after the -// node is in Running state and only if TS_AUTH_ONCE is set. -func tailscaleSet(ctx context.Context, cfg *settings) error { - args := []string{"--socket=" + cfg.Socket, "set"} - if cfg.AcceptDNS != nil && *cfg.AcceptDNS { - args = append(args, "--accept-dns=true") - } else { - args = append(args, "--accept-dns=false") - } - // --advertise-routes can be passed an empty string to configure a - // device (that might have previously advertised subnet routes) to not - // advertise any routes. Respect an empty string passed by a user and - // use it to explicitly unset the routes. - if cfg.Routes != nil { - args = append(args, "--advertise-routes="+*cfg.Routes) - } - if cfg.Hostname != "" { - args = append(args, "--hostname="+cfg.Hostname) - } - log.Printf("Running 'tailscale set'") - cmd := exec.CommandContext(ctx, "tailscale", args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - return fmt.Errorf("tailscale set failed: %v", err) - } - return nil -} - // ensureTunFile checks that /dev/net/tun exists, creating it if // missing. func ensureTunFile(root string) error { @@ -884,350 +661,6 @@ func ensureTunFile(root string) error { return nil } -// ensureIPForwarding enables IPv4/IPv6 forwarding for the container. -func ensureIPForwarding(root, clusterProxyTargetIP, tailnetTargetIP, tailnetTargetFQDN string, routes *string) error { - var ( - v4Forwarding, v6Forwarding bool - ) - if clusterProxyTargetIP != "" { - proxyIP, err := netip.ParseAddr(clusterProxyTargetIP) - if err != nil { - return fmt.Errorf("invalid cluster destination IP: %v", err) - } - if proxyIP.Is4() { - v4Forwarding = true - } else { - v6Forwarding = true - } - } - if tailnetTargetIP != "" { - proxyIP, err := netip.ParseAddr(tailnetTargetIP) - if err != nil { - return fmt.Errorf("invalid tailnet destination IP: %v", err) - } - if proxyIP.Is4() { - v4Forwarding = true - } else { - v6Forwarding = true - } - } - // Currently we only proxy traffic to the IPv4 address of the tailnet - // target. - if tailnetTargetFQDN != "" { - v4Forwarding = true - } - if routes != nil && *routes != "" { - for _, route := range strings.Split(*routes, ",") { - cidr, err := netip.ParsePrefix(route) - if err != nil { - return fmt.Errorf("invalid subnet route: %v", err) - } - if cidr.Addr().Is4() { - v4Forwarding = true - } else { - v6Forwarding = true - } - } - } - return enableIPForwarding(v4Forwarding, v6Forwarding, root) -} - -func enableIPForwarding(v4Forwarding, v6Forwarding bool, root string) error { - var paths []string - if v4Forwarding { - paths = append(paths, filepath.Join(root, "proc/sys/net/ipv4/ip_forward")) - } - if v6Forwarding { - paths = append(paths, filepath.Join(root, "proc/sys/net/ipv6/conf/all/forwarding")) - } - - // In some common configurations (e.g. default docker, - // kubernetes), the container environment denies write access to - // most sysctls, including IP forwarding controls. Check the - // sysctl values before trying to change them, so that we - // gracefully do nothing if the container's already been set up - // properly by e.g. a k8s initContainer. - for _, path := range paths { - bs, err := os.ReadFile(path) - if err != nil { - return fmt.Errorf("reading %q: %w", path, err) - } - if v := strings.TrimSpace(string(bs)); v != "1" { - if err := os.WriteFile(path, []byte("1"), 0644); err != nil { - return fmt.Errorf("enabling %q: %w", path, err) - } - } - } - return nil -} - -func installEgressForwardingRule(_ context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { - dst, err := netip.ParseAddr(dstStr) - if err != nil { - return err - } - var local netip.Addr - for _, pfx := range tsIPs { - if !pfx.IsSingleIP() { - continue - } - if pfx.Addr().Is4() != dst.Is4() { - continue - } - local = pfx.Addr() - break - } - if !local.IsValid() { - return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) - } - if err := nfr.DNATNonTailscaleTraffic("tailscale0", dst); err != nil { - return fmt.Errorf("installing egress proxy rules: %w", err) - } - if err := nfr.AddSNATRuleForDst(local, dst); err != nil { - return fmt.Errorf("installing egress proxy rules: %w", err) - } - if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { - return fmt.Errorf("installing egress proxy rules: %w", err) - } - return nil -} - -// installTSForwardingRuleForDestination accepts a destination address and a -// list of node's tailnet addresses, sets up rules to forward traffic for -// destination to the tailnet IP matching the destination IP family. -// Destination can be Pod IP of this node. -func installTSForwardingRuleForDestination(ctx context.Context, dstFilter string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { - dst, err := netip.ParseAddr(dstFilter) - if err != nil { - return err - } - var local netip.Addr - for _, pfx := range tsIPs { - if !pfx.IsSingleIP() { - continue - } - if pfx.Addr().Is4() != dst.Is4() { - continue - } - local = pfx.Addr() - break - } - if !local.IsValid() { - return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstFilter, tsIPs) - } - if err := nfr.AddDNATRule(dst, local); err != nil { - return fmt.Errorf("installing rule for forwarding traffic to tailnet IP: %w", err) - } - return nil -} - -func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { - dst, err := netip.ParseAddr(dstStr) - if err != nil { - return err - } - var local netip.Addr - proxyHasIPv4Address := false - for _, pfx := range tsIPs { - if !pfx.IsSingleIP() { - continue - } - if pfx.Addr().Is4() { - proxyHasIPv4Address = true - } - if pfx.Addr().Is4() != dst.Is4() { - continue - } - local = pfx.Addr() - break - } - if proxyHasIPv4Address && dst.Is6() { - log.Printf("Warning: proxy backend ClusterIP is an IPv6 address and the proxy has a IPv4 tailnet address. You might need to disable IPv4 address allocation for the proxy for forwarding to work. See https://github.com/tailscale/tailscale/issues/12156") - } - if !local.IsValid() { - return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) - } - if err := nfr.AddDNATRule(local, dst); err != nil { - return fmt.Errorf("installing ingress proxy rules: %w", err) - } - if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { - return fmt.Errorf("installing ingress proxy rules: %w", err) - } - return nil -} - -func installIngressForwardingRuleForDNSTarget(ctx context.Context, backendAddrs []net.IP, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { - var ( - tsv4 netip.Addr - tsv6 netip.Addr - v4Backends []netip.Addr - v6Backends []netip.Addr - ) - for _, pfx := range tsIPs { - if pfx.IsSingleIP() && pfx.Addr().Is4() { - tsv4 = pfx.Addr() - continue - } - if pfx.IsSingleIP() && pfx.Addr().Is6() { - tsv6 = pfx.Addr() - continue - } - } - // TODO: log if more than one backend address is found and firewall is - // in nftables mode that only the first IP will be used. - for _, ip := range backendAddrs { - if ip.To4() != nil { - v4Backends = append(v4Backends, netip.AddrFrom4([4]byte(ip.To4()))) - } - if ip.To16() != nil { - v6Backends = append(v6Backends, netip.AddrFrom16([16]byte(ip.To16()))) - } - } - - // Enable IP forwarding here as opposed to at the start of containerboot - // as the IPv4/IPv6 requirements might have changed. - // For Kubernetes operator proxies, forwarding for both IPv4 and IPv6 is - // enabled by an init container, so in practice enabling forwarding here - // is only needed if this proxy has been configured by manually setting - // TS_EXPERIMENTAL_DEST_DNS_NAME env var for a containerboot instance. - if err := enableIPForwarding(len(v4Backends) != 0, len(v6Backends) != 0, ""); err != nil { - log.Printf("[unexpected] failed to ensure IP forwarding: %v", err) - } - - updateFirewall := func(dst netip.Addr, backendTargets []netip.Addr) error { - if err := nfr.DNATWithLoadBalancer(dst, backendTargets); err != nil { - return fmt.Errorf("installing DNAT rules for ingress backends %+#v: %w", backendTargets, err) - } - // The backend might advertize MSS higher than that of the - // tailscale interfaces. Clamp MSS of packets going out via - // tailscale0 interface to its MTU to prevent broken connections - // in environments where path MTU discovery is not working. - if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { - return fmt.Errorf("adding rule to clamp traffic via tailscale0: %v", err) - } - return nil - } - - if len(v4Backends) != 0 { - if !tsv4.IsValid() { - log.Printf("backend targets %v contain at least one IPv4 address, but this node's Tailscale IPs do not contain a valid IPv4 address: %v", backendAddrs, tsIPs) - } else if err := updateFirewall(tsv4, v4Backends); err != nil { - return fmt.Errorf("Installing IPv4 firewall rules: %w", err) - } - } - if len(v6Backends) != 0 && !tsv6.IsValid() { - if !tsv6.IsValid() { - log.Printf("backend targets %v contain at least one IPv6 address, but this node's Tailscale IPs do not contain a valid IPv6 address: %v", backendAddrs, tsIPs) - } else if !nfr.HasIPV6NAT() { - log.Printf("backend targets %v contain at least one IPv6 address, but the chosen firewall mode does not support IPv6 NAT", backendAddrs) - } else if err := updateFirewall(tsv6, v6Backends); err != nil { - return fmt.Errorf("Installing IPv6 firewall rules: %w", err) - } - } - return nil -} - -// settings is all the configuration for containerboot. -type settings struct { - AuthKey string - Hostname string - Routes *string - // ProxyTargetIP is the destination IP to which all incoming - // Tailscale traffic should be proxied. If empty, no proxying - // is done. This is typically a locally reachable IP. - ProxyTargetIP string - // ProxyTargetDNSName is a DNS name to whose backing IP addresses all - // incoming Tailscale traffic should be proxied. - ProxyTargetDNSName string - // TailnetTargetIP is the destination IP to which all incoming - // non-Tailscale traffic should be proxied. This is typically a - // Tailscale IP. - TailnetTargetIP string - // TailnetTargetFQDN is an MagicDNS name to which all incoming - // non-Tailscale traffic should be proxied. This must be a full Tailnet - // node FQDN. - TailnetTargetFQDN string - ServeConfigPath string - DaemonExtraArgs string - ExtraArgs string - InKubernetes bool - UserspaceMode bool - StateDir string - AcceptDNS *bool - KubeSecret string - SOCKSProxyAddr string - HTTPProxyAddr string - Socket string - AuthOnce bool - Root string - KubernetesCanPatch bool - TailscaledConfigFilePath string - EnableForwardingOptimizations bool - // If set to true and, if this containerboot instance is a Kubernetes - // ingress proxy, set up rules to forward incoming cluster traffic to be - // forwarded to the ingress target in cluster. - AllowProxyingClusterTrafficViaIngress bool - // PodIP is the IP of the Pod if running in Kubernetes. This is used - // when setting up rules to proxy cluster traffic to cluster ingress - // target. - PodIP string - HealthCheckAddrPort string -} - -func (s *settings) validate() error { - if s.TailscaledConfigFilePath != "" { - dir, file := path.Split(s.TailscaledConfigFilePath) - if _, err := os.Stat(dir); err != nil { - return fmt.Errorf("error validating whether directory with tailscaled config file %s exists: %w", dir, err) - } - if _, err := os.Stat(s.TailscaledConfigFilePath); err != nil { - return fmt.Errorf("error validating whether tailscaled config directory %q contains tailscaled config for current capability version %q: %w. If this is a Tailscale Kubernetes operator proxy, please ensure that the version of the operator is not older than the version of the proxy", dir, file, err) - } - if _, err := conffile.Load(s.TailscaledConfigFilePath); err != nil { - return fmt.Errorf("error validating tailscaled configfile contents: %w", err) - } - } - if s.ProxyTargetIP != "" && s.UserspaceMode { - return errors.New("TS_DEST_IP is not supported with TS_USERSPACE") - } - if s.ProxyTargetDNSName != "" && s.UserspaceMode { - return errors.New("TS_EXPERIMENTAL_DEST_DNS_NAME is not supported with TS_USERSPACE") - } - if s.ProxyTargetDNSName != "" && s.ProxyTargetIP != "" { - return errors.New("TS_EXPERIMENTAL_DEST_DNS_NAME and TS_DEST_IP cannot both be set") - } - if s.TailnetTargetIP != "" && s.UserspaceMode { - return errors.New("TS_TAILNET_TARGET_IP is not supported with TS_USERSPACE") - } - if s.TailnetTargetFQDN != "" && s.UserspaceMode { - return errors.New("TS_TAILNET_TARGET_FQDN is not supported with TS_USERSPACE") - } - if s.TailnetTargetFQDN != "" && s.TailnetTargetIP != "" { - return errors.New("Both TS_TAILNET_TARGET_IP and TS_TAILNET_FQDN cannot be set") - } - if s.TailscaledConfigFilePath != "" && (s.AcceptDNS != nil || s.AuthKey != "" || s.Routes != nil || s.ExtraArgs != "" || s.Hostname != "") { - return errors.New("TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR cannot be set in combination with TS_HOSTNAME, TS_EXTRA_ARGS, TS_AUTHKEY, TS_ROUTES, TS_ACCEPT_DNS.") - } - if s.AllowProxyingClusterTrafficViaIngress && s.UserspaceMode { - return errors.New("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS is not supported in userspace mode") - } - if s.AllowProxyingClusterTrafficViaIngress && s.ServeConfigPath == "" { - return errors.New("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS is set but this is not a cluster ingress proxy") - } - if s.AllowProxyingClusterTrafficViaIngress && s.PodIP == "" { - return errors.New("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS is set but POD_IP is not set") - } - if s.EnableForwardingOptimizations && s.UserspaceMode { - return errors.New("TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS is not supported in userspace mode") - } - if s.HealthCheckAddrPort != "" { - if _, err := netip.ParseAddrPort(s.HealthCheckAddrPort); err != nil { - return fmt.Errorf("error parsing TS_HEALTH_CHECK_ADDR_PORT value %q: %w", s.HealthCheckAddrPort, err) - } - } - return nil -} - func resolveDNS(ctx context.Context, name string) ([]net.IP, error) { // TODO (irbekrm): look at using recursive.Resolver instead to resolve // the DNS names as well as retrieve TTLs. It looks though that this @@ -1250,57 +683,6 @@ func resolveDNS(ctx context.Context, name string) ([]net.IP, error) { return append(ip4s, ip6s...), nil } -// defaultEnv returns the value of the given envvar name, or defVal if -// unset. -func defaultEnv(name, defVal string) string { - if v, ok := os.LookupEnv(name); ok { - return v - } - return defVal -} - -// defaultEnvStringPointer returns a pointer to the given envvar value if set, else -// returns nil. This is useful in cases where we need to distinguish between a -// variable being set to empty string vs unset. -func defaultEnvStringPointer(name string) *string { - if v, ok := os.LookupEnv(name); ok { - return &v - } - return nil -} - -// defaultEnvBoolPointer returns a pointer to the given envvar value if set, else -// returns nil. This is useful in cases where we need to distinguish between a -// variable being explicitly set to false vs unset. -func defaultEnvBoolPointer(name string) *bool { - v := os.Getenv(name) - ret, err := strconv.ParseBool(v) - if err != nil { - return nil - } - return &ret -} - -func defaultEnvs(names []string, defVal string) string { - for _, name := range names { - if v, ok := os.LookupEnv(name); ok { - return v - } - } - return defVal -} - -// defaultBool returns the boolean value of the given envvar name, or -// defVal if unset or not a bool. -func defaultBool(name string, defVal bool) bool { - v := os.Getenv(name) - ret, err := strconv.ParseBool(v) - if err != nil { - return defVal - } - return ret -} - // contextWithExitSignalWatch watches for SIGTERM/SIGINT signals. It returns a // context that gets cancelled when a signal is received and a cancel function // that can be called to free the resources when the watch should be stopped. @@ -1323,43 +705,6 @@ func contextWithExitSignalWatch() (context.Context, func()) { return ctx, f } -// isTwoStepConfigAuthOnce returns true if the Tailscale node should be configured -// in two steps and login should only happen once. -// Step 1: run 'tailscaled' -// Step 2): -// A) if this is the first time starting this node run 'tailscale up --authkey ' -// B) if this is not the first time starting this node run 'tailscale set '. -func isTwoStepConfigAuthOnce(cfg *settings) bool { - return cfg.AuthOnce && cfg.TailscaledConfigFilePath == "" -} - -// isTwoStepConfigAlwaysAuth returns true if the Tailscale node should be configured -// in two steps and we should log in every time it starts. -// Step 1: run 'tailscaled' -// Step 2): run 'tailscale up --authkey ' -func isTwoStepConfigAlwaysAuth(cfg *settings) bool { - return !cfg.AuthOnce && cfg.TailscaledConfigFilePath == "" -} - -// isOneStepConfig returns true if the Tailscale node should always be ran and -// configured in a single step by running 'tailscaled ' -func isOneStepConfig(cfg *settings) bool { - return cfg.TailscaledConfigFilePath != "" -} - -// isL3Proxy returns true if the Tailscale node needs to be configured to act -// as an L3 proxy, proxying to an endpoint provided via one of the config env -// vars. -func isL3Proxy(cfg *settings) bool { - return cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress -} - -// hasKubeStateStore returns true if the state must be stored in a Kubernetes -// Secret. -func hasKubeStateStore(cfg *settings) bool { - return cfg.InKubernetes && cfg.KubernetesCanPatch && cfg.KubeSecret != "" -} - // tailscaledConfigFilePath returns the path to the tailscaled config file that // should be used for the current capability version. It is determined by the // TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR environment variable and looks for a @@ -1398,41 +743,3 @@ func tailscaledConfigFilePath() string { log.Printf("Using tailscaled config file %q for capability version %q", maxCompatVer, tailcfg.CurrentCapabilityVersion) return path.Join(dir, kubeutils.TailscaledConfigFileNameForCap(maxCompatVer)) } - -// healthz is a simple health check server, if enabled it returns 200 OK if -// this tailscale node currently has at least one tailnet IP address else -// returns 503. -type healthz struct { - sync.Mutex - hasAddrs bool -} - -func (h *healthz) ServeHTTP(w http.ResponseWriter, r *http.Request) { - h.Lock() - defer h.Unlock() - if h.hasAddrs { - w.Write([]byte("ok")) - } else { - http.Error(w, "node currently has no tailscale IPs", http.StatusInternalServerError) - } -} - -// runHealthz runs a simple HTTP health endpoint on /healthz, listening on the -// provided address. A containerized tailscale instance is considered healthy if -// it has at least one tailnet IP address. -func runHealthz(addr string, h *healthz) { - lis, err := net.Listen("tcp", addr) - if err != nil { - log.Fatalf("error listening on the provided health endpoint address %q: %v", addr, err) - } - mux := http.NewServeMux() - mux.Handle("/healthz", h) - log.Printf("Running healthcheck endpoint at %s/healthz", addr) - hs := &http.Server{Handler: mux} - - go func() { - if err := hs.Serve(lis); err != nil { - log.Fatalf("failed running health endpoint: %v", err) - } - }() -} diff --git a/cmd/containerboot/serve.go b/cmd/containerboot/serve.go new file mode 100644 index 000000000..6c22b3eeb --- /dev/null +++ b/cmd/containerboot/serve.go @@ -0,0 +1,96 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package main + +import ( + "bytes" + "context" + "encoding/json" + "log" + "os" + "path/filepath" + "reflect" + "sync/atomic" + "time" + + "github.com/fsnotify/fsnotify" + "tailscale.com/client/tailscale" + "tailscale.com/ipn" +) + +// watchServeConfigChanges watches path for changes, and when it sees one, reads +// the serve config from it, replacing ${TS_CERT_DOMAIN} with certDomain, and +// applies it to lc. It exits when ctx is canceled. cdChanged is a channel that +// is written to when the certDomain changes, causing the serve config to be +// re-read and applied. +func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *tailscale.LocalClient) { + if certDomainAtomic == nil { + panic("cd must not be nil") + } + var tickChan <-chan time.Time + var eventChan <-chan fsnotify.Event + if w, err := fsnotify.NewWatcher(); err != nil { + log.Printf("failed to create fsnotify watcher, timer-only mode: %v", err) + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + tickChan = ticker.C + } else { + defer w.Close() + if err := w.Add(filepath.Dir(path)); err != nil { + log.Fatalf("failed to add fsnotify watch: %v", err) + } + eventChan = w.Events + } + + var certDomain string + var prevServeConfig *ipn.ServeConfig + for { + select { + case <-ctx.Done(): + return + case <-cdChanged: + certDomain = *certDomainAtomic.Load() + case <-tickChan: + case <-eventChan: + // We can't do any reasonable filtering on the event because of how + // k8s handles these mounts. So just re-read the file and apply it + // if it's changed. + } + if certDomain == "" { + continue + } + sc, err := readServeConfig(path, certDomain) + if err != nil { + log.Fatalf("failed to read serve config: %v", err) + } + if prevServeConfig != nil && reflect.DeepEqual(sc, prevServeConfig) { + continue + } + log.Printf("Applying serve config") + if err := lc.SetServeConfig(ctx, sc); err != nil { + log.Fatalf("failed to set serve config: %v", err) + } + prevServeConfig = sc + } +} + +// readServeConfig reads the ipn.ServeConfig from path, replacing +// ${TS_CERT_DOMAIN} with certDomain. +func readServeConfig(path, certDomain string) (*ipn.ServeConfig, error) { + if path == "" { + return nil, nil + } + j, err := os.ReadFile(path) + if err != nil { + return nil, err + } + j = bytes.ReplaceAll(j, []byte("${TS_CERT_DOMAIN}"), []byte(certDomain)) + var sc ipn.ServeConfig + if err := json.Unmarshal(j, &sc); err != nil { + return nil, err + } + return &sc, nil +} diff --git a/cmd/containerboot/settings.go b/cmd/containerboot/settings.go new file mode 100644 index 000000000..c61996949 --- /dev/null +++ b/cmd/containerboot/settings.go @@ -0,0 +1,259 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package main + +import ( + "context" + "errors" + "fmt" + "log" + "net/netip" + "os" + "path" + "strconv" + + "tailscale.com/ipn/conffile" + "tailscale.com/kube/kubeclient" +) + +// settings is all the configuration for containerboot. +type settings struct { + AuthKey string + Hostname string + Routes *string + // ProxyTargetIP is the destination IP to which all incoming + // Tailscale traffic should be proxied. If empty, no proxying + // is done. This is typically a locally reachable IP. + ProxyTargetIP string + // ProxyTargetDNSName is a DNS name to whose backing IP addresses all + // incoming Tailscale traffic should be proxied. + ProxyTargetDNSName string + // TailnetTargetIP is the destination IP to which all incoming + // non-Tailscale traffic should be proxied. This is typically a + // Tailscale IP. + TailnetTargetIP string + // TailnetTargetFQDN is an MagicDNS name to which all incoming + // non-Tailscale traffic should be proxied. This must be a full Tailnet + // node FQDN. + TailnetTargetFQDN string + ServeConfigPath string + DaemonExtraArgs string + ExtraArgs string + InKubernetes bool + UserspaceMode bool + StateDir string + AcceptDNS *bool + KubeSecret string + SOCKSProxyAddr string + HTTPProxyAddr string + Socket string + AuthOnce bool + Root string + KubernetesCanPatch bool + TailscaledConfigFilePath string + EnableForwardingOptimizations bool + // If set to true and, if this containerboot instance is a Kubernetes + // ingress proxy, set up rules to forward incoming cluster traffic to be + // forwarded to the ingress target in cluster. + AllowProxyingClusterTrafficViaIngress bool + // PodIP is the IP of the Pod if running in Kubernetes. This is used + // when setting up rules to proxy cluster traffic to cluster ingress + // target. + PodIP string + HealthCheckAddrPort string +} + +func (s *settings) validate() error { + if s.TailscaledConfigFilePath != "" { + dir, file := path.Split(s.TailscaledConfigFilePath) + if _, err := os.Stat(dir); err != nil { + return fmt.Errorf("error validating whether directory with tailscaled config file %s exists: %w", dir, err) + } + if _, err := os.Stat(s.TailscaledConfigFilePath); err != nil { + return fmt.Errorf("error validating whether tailscaled config directory %q contains tailscaled config for current capability version %q: %w. If this is a Tailscale Kubernetes operator proxy, please ensure that the version of the operator is not older than the version of the proxy", dir, file, err) + } + if _, err := conffile.Load(s.TailscaledConfigFilePath); err != nil { + return fmt.Errorf("error validating tailscaled configfile contents: %w", err) + } + } + if s.ProxyTargetIP != "" && s.UserspaceMode { + return errors.New("TS_DEST_IP is not supported with TS_USERSPACE") + } + if s.ProxyTargetDNSName != "" && s.UserspaceMode { + return errors.New("TS_EXPERIMENTAL_DEST_DNS_NAME is not supported with TS_USERSPACE") + } + if s.ProxyTargetDNSName != "" && s.ProxyTargetIP != "" { + return errors.New("TS_EXPERIMENTAL_DEST_DNS_NAME and TS_DEST_IP cannot both be set") + } + if s.TailnetTargetIP != "" && s.UserspaceMode { + return errors.New("TS_TAILNET_TARGET_IP is not supported with TS_USERSPACE") + } + if s.TailnetTargetFQDN != "" && s.UserspaceMode { + return errors.New("TS_TAILNET_TARGET_FQDN is not supported with TS_USERSPACE") + } + if s.TailnetTargetFQDN != "" && s.TailnetTargetIP != "" { + return errors.New("Both TS_TAILNET_TARGET_IP and TS_TAILNET_FQDN cannot be set") + } + if s.TailscaledConfigFilePath != "" && (s.AcceptDNS != nil || s.AuthKey != "" || s.Routes != nil || s.ExtraArgs != "" || s.Hostname != "") { + return errors.New("TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR cannot be set in combination with TS_HOSTNAME, TS_EXTRA_ARGS, TS_AUTHKEY, TS_ROUTES, TS_ACCEPT_DNS.") + } + if s.AllowProxyingClusterTrafficViaIngress && s.UserspaceMode { + return errors.New("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS is not supported in userspace mode") + } + if s.AllowProxyingClusterTrafficViaIngress && s.ServeConfigPath == "" { + return errors.New("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS is set but this is not a cluster ingress proxy") + } + if s.AllowProxyingClusterTrafficViaIngress && s.PodIP == "" { + return errors.New("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS is set but POD_IP is not set") + } + if s.EnableForwardingOptimizations && s.UserspaceMode { + return errors.New("TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS is not supported in userspace mode") + } + if s.HealthCheckAddrPort != "" { + if _, err := netip.ParseAddrPort(s.HealthCheckAddrPort); err != nil { + return fmt.Errorf("error parsing TS_HEALTH_CHECK_ADDR_PORT value %q: %w", s.HealthCheckAddrPort, err) + } + } + return nil +} + +// setupKube is responsible for doing any necessary configuration and checks to +// ensure that tailscale state storage and authentication mechanism will work on +// Kubernetes. +func (cfg *settings) setupKube(ctx context.Context) error { + if cfg.KubeSecret == "" { + return nil + } + canPatch, canCreate, err := kc.CheckSecretPermissions(ctx, cfg.KubeSecret) + if err != nil { + return fmt.Errorf("Some Kubernetes permissions are missing, please check your RBAC configuration: %v", err) + } + cfg.KubernetesCanPatch = canPatch + + s, err := kc.GetSecret(ctx, cfg.KubeSecret) + if err != nil && kubeclient.IsNotFoundErr(err) && !canCreate { + return fmt.Errorf("Tailscale state Secret %s does not exist and we don't have permissions to create it. "+ + "If you intend to store tailscale state elsewhere than a Kubernetes Secret, "+ + "you can explicitly set TS_KUBE_SECRET env var to an empty string. "+ + "Else ensure that RBAC is set up that allows the service account associated with this installation to create Secrets.", cfg.KubeSecret) + } else if err != nil && !kubeclient.IsNotFoundErr(err) { + return fmt.Errorf("Getting Tailscale state Secret %s: %v", cfg.KubeSecret, err) + } + + if cfg.AuthKey == "" && !isOneStepConfig(cfg) { + if s == nil { + log.Print("TS_AUTHKEY not provided and kube secret does not exist, login will be interactive if needed.") + return nil + } + keyBytes, _ := s.Data["authkey"] + key := string(keyBytes) + + if key != "" { + // This behavior of pulling authkeys from kube secrets was added + // at the same time as the patch permission, so we can enforce + // that we must be able to patch out the authkey after + // authenticating if you want to use this feature. This avoids + // us having to deal with the case where we might leave behind + // an unnecessary reusable authkey in a secret, like a rake in + // the grass. + if !cfg.KubernetesCanPatch { + return errors.New("authkey found in TS_KUBE_SECRET, but the pod doesn't have patch permissions on the secret to manage the authkey.") + } + cfg.AuthKey = key + } else { + log.Print("No authkey found in kube secret and TS_AUTHKEY not provided, login will be interactive if needed.") + } + } + return nil +} + +// isTwoStepConfigAuthOnce returns true if the Tailscale node should be configured +// in two steps and login should only happen once. +// Step 1: run 'tailscaled' +// Step 2): +// A) if this is the first time starting this node run 'tailscale up --authkey ' +// B) if this is not the first time starting this node run 'tailscale set '. +func isTwoStepConfigAuthOnce(cfg *settings) bool { + return cfg.AuthOnce && cfg.TailscaledConfigFilePath == "" +} + +// isTwoStepConfigAlwaysAuth returns true if the Tailscale node should be configured +// in two steps and we should log in every time it starts. +// Step 1: run 'tailscaled' +// Step 2): run 'tailscale up --authkey ' +func isTwoStepConfigAlwaysAuth(cfg *settings) bool { + return !cfg.AuthOnce && cfg.TailscaledConfigFilePath == "" +} + +// isOneStepConfig returns true if the Tailscale node should always be ran and +// configured in a single step by running 'tailscaled ' +func isOneStepConfig(cfg *settings) bool { + return cfg.TailscaledConfigFilePath != "" +} + +// isL3Proxy returns true if the Tailscale node needs to be configured to act +// as an L3 proxy, proxying to an endpoint provided via one of the config env +// vars. +func isL3Proxy(cfg *settings) bool { + return cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress +} + +// hasKubeStateStore returns true if the state must be stored in a Kubernetes +// Secret. +func hasKubeStateStore(cfg *settings) bool { + return cfg.InKubernetes && cfg.KubernetesCanPatch && cfg.KubeSecret != "" +} + +// defaultEnv returns the value of the given envvar name, or defVal if +// unset. +func defaultEnv(name, defVal string) string { + if v, ok := os.LookupEnv(name); ok { + return v + } + return defVal +} + +// defaultEnvStringPointer returns a pointer to the given envvar value if set, else +// returns nil. This is useful in cases where we need to distinguish between a +// variable being set to empty string vs unset. +func defaultEnvStringPointer(name string) *string { + if v, ok := os.LookupEnv(name); ok { + return &v + } + return nil +} + +// defaultEnvBoolPointer returns a pointer to the given envvar value if set, else +// returns nil. This is useful in cases where we need to distinguish between a +// variable being explicitly set to false vs unset. +func defaultEnvBoolPointer(name string) *bool { + v := os.Getenv(name) + ret, err := strconv.ParseBool(v) + if err != nil { + return nil + } + return &ret +} + +func defaultEnvs(names []string, defVal string) string { + for _, name := range names { + if v, ok := os.LookupEnv(name); ok { + return v + } + } + return defVal +} + +// defaultBool returns the boolean value of the given envvar name, or +// defVal if unset or not a bool. +func defaultBool(name string, defVal bool) bool { + v := os.Getenv(name) + ret, err := strconv.ParseBool(v) + if err != nil { + return defVal + } + return ret +} diff --git a/cmd/containerboot/tailscaled.go b/cmd/containerboot/tailscaled.go new file mode 100644 index 000000000..53fb7e703 --- /dev/null +++ b/cmd/containerboot/tailscaled.go @@ -0,0 +1,162 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package main + +import ( + "context" + "errors" + "fmt" + "io/fs" + "log" + "os" + "os/exec" + "strings" + "syscall" + "time" + + "tailscale.com/client/tailscale" +) + +func startTailscaled(ctx context.Context, cfg *settings) (*tailscale.LocalClient, *os.Process, error) { + args := tailscaledArgs(cfg) + // tailscaled runs without context, since it needs to persist + // beyond the startup timeout in ctx. + cmd := exec.Command("tailscaled", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + log.Printf("Starting tailscaled") + if err := cmd.Start(); err != nil { + return nil, nil, fmt.Errorf("starting tailscaled failed: %v", err) + } + + // Wait for the socket file to appear, otherwise API ops will racily fail. + log.Printf("Waiting for tailscaled socket") + for { + if ctx.Err() != nil { + log.Fatalf("Timed out waiting for tailscaled socket") + } + _, err := os.Stat(cfg.Socket) + if errors.Is(err, fs.ErrNotExist) { + time.Sleep(100 * time.Millisecond) + continue + } else if err != nil { + log.Fatalf("Waiting for tailscaled socket: %v", err) + } + break + } + + tsClient := &tailscale.LocalClient{ + Socket: cfg.Socket, + UseSocketOnly: true, + } + + return tsClient, cmd.Process, nil +} + +// tailscaledArgs uses cfg to construct the argv for tailscaled. +func tailscaledArgs(cfg *settings) []string { + args := []string{"--socket=" + cfg.Socket} + switch { + case cfg.InKubernetes && cfg.KubeSecret != "": + args = append(args, "--state=kube:"+cfg.KubeSecret) + if cfg.StateDir == "" { + cfg.StateDir = "/tmp" + } + fallthrough + case cfg.StateDir != "": + args = append(args, "--statedir="+cfg.StateDir) + default: + args = append(args, "--state=mem:", "--statedir=/tmp") + } + + if cfg.UserspaceMode { + args = append(args, "--tun=userspace-networking") + } else if err := ensureTunFile(cfg.Root); err != nil { + log.Fatalf("ensuring that /dev/net/tun exists: %v", err) + } + + if cfg.SOCKSProxyAddr != "" { + args = append(args, "--socks5-server="+cfg.SOCKSProxyAddr) + } + if cfg.HTTPProxyAddr != "" { + args = append(args, "--outbound-http-proxy-listen="+cfg.HTTPProxyAddr) + } + if cfg.TailscaledConfigFilePath != "" { + args = append(args, "--config="+cfg.TailscaledConfigFilePath) + } + if cfg.DaemonExtraArgs != "" { + args = append(args, strings.Fields(cfg.DaemonExtraArgs)...) + } + return args +} + +// tailscaleUp uses cfg to run 'tailscale up' everytime containerboot starts, or +// if TS_AUTH_ONCE is set, only the first time containerboot starts. +func tailscaleUp(ctx context.Context, cfg *settings) error { + args := []string{"--socket=" + cfg.Socket, "up"} + if cfg.AcceptDNS != nil && *cfg.AcceptDNS { + args = append(args, "--accept-dns=true") + } else { + args = append(args, "--accept-dns=false") + } + if cfg.AuthKey != "" { + args = append(args, "--authkey="+cfg.AuthKey) + } + // --advertise-routes can be passed an empty string to configure a + // device (that might have previously advertised subnet routes) to not + // advertise any routes. Respect an empty string passed by a user and + // use it to explicitly unset the routes. + if cfg.Routes != nil { + args = append(args, "--advertise-routes="+*cfg.Routes) + } + if cfg.Hostname != "" { + args = append(args, "--hostname="+cfg.Hostname) + } + if cfg.ExtraArgs != "" { + args = append(args, strings.Fields(cfg.ExtraArgs)...) + } + log.Printf("Running 'tailscale up'") + cmd := exec.CommandContext(ctx, "tailscale", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("tailscale up failed: %v", err) + } + return nil +} + +// tailscaleSet uses cfg to run 'tailscale set' to set any known configuration +// options that are passed in via environment variables. This is run after the +// node is in Running state and only if TS_AUTH_ONCE is set. +func tailscaleSet(ctx context.Context, cfg *settings) error { + args := []string{"--socket=" + cfg.Socket, "set"} + if cfg.AcceptDNS != nil && *cfg.AcceptDNS { + args = append(args, "--accept-dns=true") + } else { + args = append(args, "--accept-dns=false") + } + // --advertise-routes can be passed an empty string to configure a + // device (that might have previously advertised subnet routes) to not + // advertise any routes. Respect an empty string passed by a user and + // use it to explicitly unset the routes. + if cfg.Routes != nil { + args = append(args, "--advertise-routes="+*cfg.Routes) + } + if cfg.Hostname != "" { + args = append(args, "--hostname="+cfg.Hostname) + } + log.Printf("Running 'tailscale set'") + cmd := exec.CommandContext(ctx, "tailscale", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("tailscale set failed: %v", err) + } + return nil +}