2023-01-27 21:37:20 +00:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2022-10-25 21:12:54 +01:00
//go:build linux
2022-12-16 22:09:46 +00:00
// The containerboot binary is a wrapper for starting tailscaled in a container.
// It handles reading the desired mode of operation out of environment
// variables, bringing up and authenticating Tailscale, and any other
// kubernetes-specific side jobs.
2022-10-25 21:12:54 +01:00
//
2022-12-16 22:09:46 +00:00
// As with most container things, configuration is passed through environment
// variables. All configuration is optional.
2022-10-25 21:12:54 +01:00
//
2023-01-03 23:17:44 +00:00
// - TS_AUTHKEY: the authkey to use for login.
2023-01-25 18:16:59 +00:00
// - TS_HOSTNAME: the hostname to request for the node.
2024-01-04 09:17:04 +00:00
// - TS_ROUTES: subnet routes to advertise. Explicitly setting it to an empty
// value will cause containerboot to stop acting as a subnet router for any
// previously advertised routes. To accept routes, use TS_EXTRA_ARGS to pass
// in --accept-routes.
2022-11-16 18:04:07 +00:00
// - TS_DEST_IP: proxy all incoming Tailscale traffic to the given
// destination.
2023-08-30 08:31:37 +01:00
// - TS_TAILNET_TARGET_IP: proxy all incoming non-Tailscale traffic to the given
2023-11-24 16:24:48 +00:00
// destination defined by an IP.
// - TS_TAILNET_TARGET_FQDN: proxy all incoming non-Tailscale traffic to the given
// destination defined by a MagicDNS name.
2022-11-16 18:04:07 +00:00
// - TS_TAILSCALED_EXTRA_ARGS: extra arguments to 'tailscaled'.
2023-10-16 02:41:28 +01:00
// - TS_EXTRA_ARGS: extra arguments to 'tailscale up'.
2022-11-16 18:04:07 +00:00
// - TS_USERSPACE: run with userspace networking (the default)
// instead of kernel networking.
// - TS_STATE_DIR: the directory in which to store tailscaled
// state. The data should persist across container
// restarts.
// - TS_ACCEPT_DNS: whether to use the tailnet's DNS configuration.
// - TS_KUBE_SECRET: the name of the Kubernetes secret in which to
// store tailscaled state.
// - TS_SOCKS5_SERVER: the address on which to listen for SOCKS5
// proxying into the tailnet.
// - TS_OUTBOUND_HTTP_PROXY_LISTEN: the address on which to listen
// for HTTP proxying into the tailnet.
// - TS_SOCKET: the path where the tailscaled LocalAPI socket should
// be created.
// - TS_AUTH_ONCE: if true, only attempt to log in if not already
2023-10-16 02:41:28 +01:00
// logged in. If false (the default, for backwards
// compatibility), forcibly log in every time the
// container starts.
2023-08-24 17:08:50 +01:00
// - TS_SERVE_CONFIG: if specified, is the file path where the ipn.ServeConfig is located.
// It will be applied once tailscaled is up and running. If the file contains
// ${TS_CERT_DOMAIN}, it will be replaced with the value of the available FQDN.
2023-08-25 21:26:04 +01:00
// It cannot be used in conjunction with TS_DEST_IP. The file is watched for changes,
// and will be re-applied when it changes.
2022-10-25 21:12:54 +01:00
//
2022-12-16 22:09:46 +00:00
// When running on Kubernetes, containerboot defaults to storing state in the
// "tailscale" kube secret. To store state on local disk instead, set
// TS_KUBE_SECRET="" and TS_STATE_DIR=/path/to/storage/dir. The state dir should
// be persistent storage.
//
2023-01-03 23:17:44 +00:00
// Additionally, if TS_AUTHKEY is not set and the TS_KUBE_SECRET contains an
2022-12-16 22:09:46 +00:00
// "authkey" field, that key is used as the tailscale authkey.
2022-10-25 21:12:54 +01:00
package main
import (
2023-08-24 17:08:50 +01:00
"bytes"
2022-10-25 21:12:54 +01:00
"context"
2023-08-24 17:08:50 +01:00
"encoding/json"
2022-10-25 21:12:54 +01:00
"errors"
"fmt"
"io/fs"
"log"
"net/netip"
"os"
"os/exec"
"os/signal"
2022-11-10 06:01:34 +00:00
"path/filepath"
2023-08-25 21:26:04 +01:00
"reflect"
2022-10-25 21:12:54 +01:00
"strconv"
"strings"
2023-11-16 19:23:18 +00:00
"sync"
2023-08-24 17:08:50 +01:00
"sync/atomic"
2022-10-25 21:12:54 +01:00
"syscall"
"time"
2023-08-24 17:08:50 +01:00
"github.com/fsnotify/fsnotify"
2022-10-25 21:12:54 +01:00
"golang.org/x/sys/unix"
"tailscale.com/client/tailscale"
2022-12-07 20:29:45 +00:00
"tailscale.com/ipn"
2023-11-24 16:24:48 +00:00
"tailscale.com/tailcfg"
2023-10-11 15:26:40 +01:00
"tailscale.com/types/logger"
2023-08-24 17:08:50 +01:00
"tailscale.com/types/ptr"
2022-12-07 20:29:45 +00:00
"tailscale.com/util/deephash"
2023-10-11 15:26:40 +01:00
"tailscale.com/util/linuxfw"
2022-10-25 21:12:54 +01:00
)
2023-10-11 15:26:40 +01:00
func newNetfilterRunner ( logf logger . Logf ) ( linuxfw . NetfilterRunner , error ) {
if defaultBool ( "TS_TEST_FAKE_NETFILTER" , false ) {
return linuxfw . NewFakeIPTablesRunner ( ) , nil
}
2023-12-04 17:08:56 +00:00
return linuxfw . New ( logf , "" )
2023-10-11 15:26:40 +01:00
}
2022-10-25 21:12:54 +01:00
func main ( ) {
log . SetPrefix ( "boot: " )
tailscale . I_Acknowledge_This_API_Is_Unstable = true
cfg := & settings {
2023-11-24 16:24:48 +00:00
AuthKey : defaultEnvs ( [ ] string { "TS_AUTHKEY" , "TS_AUTH_KEY" } , "" ) ,
Hostname : defaultEnv ( "TS_HOSTNAME" , "" ) ,
2024-01-04 09:17:04 +00:00
Routes : defaultEnvPointer ( "TS_ROUTES" ) ,
2023-11-24 16:24:48 +00:00
ServeConfigPath : defaultEnv ( "TS_SERVE_CONFIG" , "" ) ,
ProxyTo : defaultEnv ( "TS_DEST_IP" , "" ) ,
TailnetTargetIP : defaultEnv ( "TS_TAILNET_TARGET_IP" , "" ) ,
TailnetTargetFQDN : defaultEnv ( "TS_TAILNET_TARGET_FQDN" , "" ) ,
DaemonExtraArgs : defaultEnv ( "TS_TAILSCALED_EXTRA_ARGS" , "" ) ,
ExtraArgs : defaultEnv ( "TS_EXTRA_ARGS" , "" ) ,
InKubernetes : os . Getenv ( "KUBERNETES_SERVICE_HOST" ) != "" ,
UserspaceMode : defaultBool ( "TS_USERSPACE" , true ) ,
StateDir : defaultEnv ( "TS_STATE_DIR" , "" ) ,
AcceptDNS : defaultBool ( "TS_ACCEPT_DNS" , false ) ,
KubeSecret : defaultEnv ( "TS_KUBE_SECRET" , "tailscale" ) ,
SOCKSProxyAddr : defaultEnv ( "TS_SOCKS5_SERVER" , "" ) ,
HTTPProxyAddr : defaultEnv ( "TS_OUTBOUND_HTTP_PROXY_LISTEN" , "" ) ,
Socket : defaultEnv ( "TS_SOCKET" , "/tmp/tailscaled.sock" ) ,
AuthOnce : defaultBool ( "TS_AUTH_ONCE" , false ) ,
Root : defaultEnv ( "TS_TEST_ONLY_ROOT" , "/" ) ,
2022-10-25 21:12:54 +01:00
}
if cfg . ProxyTo != "" && cfg . UserspaceMode {
log . Fatal ( "TS_DEST_IP is not supported with TS_USERSPACE" )
}
2023-08-30 08:31:37 +01:00
if cfg . TailnetTargetIP != "" && cfg . UserspaceMode {
log . Fatal ( "TS_TAILNET_TARGET_IP is not supported with TS_USERSPACE" )
2023-08-24 17:08:50 +01:00
}
2023-11-24 16:24:48 +00:00
if cfg . TailnetTargetFQDN != "" && cfg . UserspaceMode {
log . Fatal ( "TS_TAILNET_TARGET_FQDN is not supported with TS_USERSPACE" )
}
if cfg . TailnetTargetFQDN != "" && cfg . TailnetTargetIP != "" {
log . Fatal ( "Both TS_TAILNET_TARGET_IP and TS_TAILNET_FQDN cannot be set" )
}
2022-10-25 21:12:54 +01:00
if ! cfg . UserspaceMode {
2022-11-10 06:01:34 +00:00
if err := ensureTunFile ( cfg . Root ) ; err != nil {
2022-10-25 21:12:54 +01:00
log . Fatalf ( "Unable to create tuntap device file: %v" , err )
}
2024-01-04 09:17:04 +00:00
if cfg . ProxyTo != "" || cfg . Routes != nil || cfg . TailnetTargetIP != "" || cfg . TailnetTargetFQDN != "" {
2023-11-24 16:24:48 +00:00
if err := ensureIPForwarding ( cfg . Root , cfg . ProxyTo , cfg . TailnetTargetIP , cfg . TailnetTargetFQDN , cfg . Routes ) ; err != nil {
2022-11-07 23:34:08 +00:00
log . Printf ( "Failed to enable IP forwarding: %v" , err )
log . Printf ( "To run tailscale as a proxy or router container, IP forwarding must be enabled." )
if cfg . InKubernetes {
log . Fatalf ( "You can either set the sysctls as a privileged initContainer, or run the tailscale container with privileged=true." )
} else {
log . Fatalf ( "You can fix this by running the container with privileged=true, or the equivalent in your container runtime that permits access to sysctls." )
}
2022-10-25 21:12:54 +01:00
}
}
}
2022-11-10 06:01:34 +00:00
if cfg . InKubernetes {
initKube ( cfg . Root )
}
2022-10-25 21:12:54 +01:00
// Context is used for all setup stuff until we're in steady
// state, so that if something is hanging we eventually time out
// and crashloop the container.
2023-08-24 17:08:50 +01:00
bootCtx , cancel := context . WithTimeout ( context . Background ( ) , 60 * time . Second )
2022-10-25 21:12:54 +01:00
defer cancel ( )
2022-12-06 22:52:22 +00:00
if cfg . InKubernetes && cfg . KubeSecret != "" {
2023-08-24 17:08:50 +01:00
canPatch , err := kc . CheckSecretPermissions ( bootCtx , cfg . KubeSecret )
2022-12-07 01:03:53 +00:00
if err != nil {
2022-12-06 22:52:22 +00:00
log . Fatalf ( "Some Kubernetes permissions are missing, please check your RBAC configuration: %v" , err )
2022-10-25 21:12:54 +01:00
}
2022-12-07 01:03:53 +00:00
cfg . KubernetesCanPatch = canPatch
2022-12-06 22:52:22 +00:00
if cfg . AuthKey == "" {
2023-08-24 17:08:50 +01:00
key , err := findKeyInKubeSecret ( bootCtx , cfg . KubeSecret )
2022-12-06 22:52:22 +00:00
if err != nil {
log . Fatalf ( "Getting authkey from kube secret: %v" , err )
}
if key != "" {
2022-12-07 01:03:53 +00:00
// This behavior of pulling authkeys from kube secrets was added
// at the same time as the patch permission, so we can enforce
// that we must be able to patch out the authkey after
// authenticating if you want to use this feature. This avoids
// us having to deal with the case where we might leave behind
// an unnecessary reusable authkey in a secret, like a rake in
// the grass.
if ! cfg . KubernetesCanPatch {
log . Fatalf ( "authkey found in TS_KUBE_SECRET, but the pod doesn't have patch permissions on the secret to manage the authkey." )
}
2022-12-06 22:52:22 +00:00
log . Print ( "Using authkey found in kube secret" )
cfg . AuthKey = key
} else {
log . Print ( "No authkey found in kube secret and TS_AUTHKEY not provided, login will be interactive if needed." )
}
2022-10-25 21:12:54 +01:00
}
}
2023-11-16 19:23:18 +00:00
client , daemonProcess , err := startTailscaled ( bootCtx , cfg )
2022-10-25 21:12:54 +01:00
if err != nil {
log . Fatalf ( "failed to bring up tailscale: %v" , err )
}
2023-11-16 19:23:18 +00:00
killTailscaled := func ( ) {
if err := daemonProcess . Signal ( unix . SIGTERM ) ; err != nil {
log . Fatalf ( "error shutting tailscaled down: %v" , err )
}
}
defer killTailscaled ( )
2022-10-25 21:12:54 +01:00
2023-08-24 17:08:50 +01:00
w , err := client . WatchIPNBus ( bootCtx , ipn . NotifyInitialNetMap | ipn . NotifyInitialPrefs | ipn . NotifyInitialState )
2022-12-05 23:38:50 +00:00
if err != nil {
2022-12-07 20:29:45 +00:00
log . Fatalf ( "failed to watch tailscaled for updates: %v" , err )
2022-12-05 23:38:50 +00:00
}
2023-12-08 17:46:32 +00:00
// Now that we've started tailscaled, we can symlink the socket to the
// default location if needed.
const defaultTailscaledSocketPath = "/var/run/tailscale/tailscaled.sock"
if cfg . Socket != "" && cfg . Socket != defaultTailscaledSocketPath {
// If we were given a socket path, symlink it to the default location so
// that the CLI can find it without any extra flags.
// See #6849.
dir := filepath . Dir ( defaultTailscaledSocketPath )
err := os . MkdirAll ( dir , 0700 )
if err == nil {
err = syscall . Symlink ( cfg . Socket , defaultTailscaledSocketPath )
}
if err != nil {
log . Printf ( "[warning] failed to symlink socket: %v\n\tTo interact with the Tailscale CLI please use `tailscale --socket=%q`" , err , cfg . Socket )
}
}
2022-12-07 20:29:45 +00:00
// Because we're still shelling out to `tailscale up` to get access to its
// flag parser, we have to stop watching the IPN bus so that we can block on
// the subcommand without stalling anything. Then once it's done, we resume
// watching the bus.
//
// Depending on the requested mode of operation, this auth step happens at
// different points in containerboot's lifecycle, hence the helper function.
didLogin := false
authTailscale := func ( ) error {
if didLogin {
return nil
}
didLogin = true
w . Close ( )
2023-10-16 02:41:28 +01:00
if err := tailscaleUp ( bootCtx , cfg ) ; err != nil {
2022-12-07 20:29:45 +00:00
return fmt . Errorf ( "failed to auth tailscale: %v" , err )
2022-10-25 21:12:54 +01:00
}
2023-08-24 17:08:50 +01:00
w , err = client . WatchIPNBus ( bootCtx , ipn . NotifyInitialNetMap | ipn . NotifyInitialState )
2022-12-07 20:29:45 +00:00
if err != nil {
return fmt . Errorf ( "rewatching tailscaled for updates after auth: %v" , err )
}
return nil
2022-10-25 21:12:54 +01:00
}
2022-12-07 20:29:45 +00:00
if ! cfg . AuthOnce {
if err := authTailscale ( ) ; err != nil {
log . Fatalf ( "failed to auth tailscale: %v" , err )
2022-10-25 21:12:54 +01:00
}
2022-12-07 20:29:45 +00:00
}
authLoop :
for {
n , err := w . Next ( )
if err != nil {
log . Fatalf ( "failed to read from tailscaled: %v" , err )
}
if n . State != nil {
switch * n . State {
case ipn . NeedsLogin :
if err := authTailscale ( ) ; err != nil {
log . Fatalf ( "failed to auth tailscale: %v" , err )
}
case ipn . NeedsMachineAuth :
log . Printf ( "machine authorization required, please visit the admin panel" )
case ipn . Running :
// Technically, all we want is to keep monitoring the bus for
// netmap updates. However, in order to make the container crash
// if tailscale doesn't initially come up, the watch has a
// startup deadline on it. So, we have to break out of this
// watch loop, cancel the watch, and watch again with no
// deadline to continue monitoring for changes.
break authLoop
default :
log . Printf ( "tailscaled in state %q, waiting" , * n . State )
2022-10-25 21:12:54 +01:00
}
}
}
2022-12-07 20:29:45 +00:00
w . Close ( )
2023-11-16 19:23:18 +00:00
ctx , cancel := contextWithExitSignalWatch ( )
2023-08-24 17:08:50 +01:00
defer cancel ( )
2023-10-16 02:41:28 +01:00
if cfg . AuthOnce {
// Now that we are authenticated, we can set/reset any of the
// settings that we need to.
if err := tailscaleSet ( ctx , cfg ) ; err != nil {
log . Fatalf ( "failed to auth tailscale: %v" , err )
}
2023-08-24 17:08:50 +01:00
}
2023-09-29 17:08:49 +01:00
if cfg . ServeConfigPath != "" {
// Remove any serve config that may have been set by a previous run of
// containerboot, but only if we're providing a new one.
if err := client . SetServeConfig ( ctx , new ( ipn . ServeConfig ) ) ; err != nil {
log . Fatalf ( "failed to unset serve config: %v" , err )
}
2023-08-24 17:08:50 +01:00
}
2022-12-07 20:29:45 +00:00
if cfg . InKubernetes && cfg . KubeSecret != "" && cfg . KubernetesCanPatch && cfg . AuthOnce {
// We were told to only auth once, so any secret-bound
// authkey is no longer needed. We don't strictly need to
// wipe it, but it's good hygiene.
log . Printf ( "Deleting authkey from kube secret" )
if err := deleteAuthKey ( ctx , cfg . KubeSecret ) ; err != nil {
log . Fatalf ( "deleting authkey from kube secret: %v" , err )
2022-10-25 21:12:54 +01:00
}
2022-12-07 20:29:45 +00:00
}
2023-08-24 17:08:50 +01:00
w , err = client . WatchIPNBus ( ctx , ipn . NotifyInitialNetMap | ipn . NotifyInitialState )
2022-12-07 20:29:45 +00:00
if err != nil {
log . Fatalf ( "rewatching tailscaled for updates after auth: %v" , err )
}
var (
2023-11-24 16:24:48 +00:00
wantProxy = cfg . ProxyTo != "" || cfg . TailnetTargetIP != "" || cfg . TailnetTargetFQDN != ""
2022-12-07 20:29:45 +00:00
wantDeviceInfo = cfg . InKubernetes && cfg . KubeSecret != "" && cfg . KubernetesCanPatch
startupTasksDone = false
currentIPs deephash . Sum // tailscale IPs assigned to device
currentDeviceInfo deephash . Sum // device ID and fqdn
2023-08-24 17:08:50 +01:00
2023-11-24 16:24:48 +00:00
currentEgressIPs deephash . Sum
2023-08-24 17:08:50 +01:00
certDomain = new ( atomic . Pointer [ string ] )
certDomainChanged = make ( chan bool , 1 )
2022-12-07 20:29:45 +00:00
)
2023-08-24 17:08:50 +01:00
if cfg . ServeConfigPath != "" {
go watchServeConfigChanges ( ctx , cfg . ServeConfigPath , certDomainChanged , certDomain , client )
}
2023-10-11 15:26:40 +01:00
var nfr linuxfw . NetfilterRunner
if wantProxy {
nfr , err = newNetfilterRunner ( log . Printf )
if err != nil {
log . Fatalf ( "error creating new netfilter runner: %v" , err )
}
}
2023-11-16 19:23:18 +00:00
notifyChan := make ( chan ipn . Notify )
errChan := make ( chan error )
go func ( ) {
for {
n , err := w . Next ( )
if err != nil {
errChan <- err
break
} else {
notifyChan <- n
}
}
} ( )
var wg sync . WaitGroup
runLoop :
2022-12-07 20:29:45 +00:00
for {
2023-11-16 19:23:18 +00:00
select {
case <- ctx . Done ( ) :
// Although killTailscaled() is deferred earlier, if we
// have started the reaper defined below, we need to
// kill tailscaled and let reaper clean up child
// processes.
killTailscaled ( )
break runLoop
case err := <- errChan :
2022-12-07 20:29:45 +00:00
log . Fatalf ( "failed to read from tailscaled: %v" , err )
2023-11-16 19:23:18 +00:00
case n := <- notifyChan :
if n . State != nil && * n . State != ipn . Running {
// Something's gone wrong and we've left the authenticated state.
// Our container image never recovered gracefully from this, and the
// control flow required to make it work now is hard. So, just crash
// the container and rely on the container runtime to restart us,
// whereupon we'll go through initial auth again.
log . Fatalf ( "tailscaled left running state (now in state %q), exiting" , * n . State )
2022-12-07 20:29:45 +00:00
}
2023-11-16 19:23:18 +00:00
if n . NetMap != nil {
addrs := n . NetMap . SelfNode . Addresses ( ) . AsSlice ( )
newCurrentIPs := deephash . Hash ( & addrs )
ipsHaveChanged := newCurrentIPs != currentIPs
2023-11-24 16:24:48 +00:00
if cfg . TailnetTargetFQDN != "" {
var (
egressAddrs [ ] netip . Prefix
newCurentEgressIPs deephash . Sum
egressIPsHaveChanged bool
node tailcfg . NodeView
nodeFound bool
)
for _ , n := range n . NetMap . Peers {
if strings . EqualFold ( n . Name ( ) , cfg . TailnetTargetFQDN ) {
node = n
nodeFound = true
break
}
}
if ! nodeFound {
log . Printf ( "Tailscale node %q not found; it either does not exist, or not reachable because of ACLs" , cfg . TailnetTargetFQDN )
break
}
egressAddrs = node . Addresses ( ) . AsSlice ( )
newCurentEgressIPs = deephash . Hash ( & egressAddrs )
egressIPsHaveChanged = newCurentEgressIPs != currentEgressIPs
if egressIPsHaveChanged && len ( egressAddrs ) > 0 {
for _ , egressAddr := range egressAddrs {
ea := egressAddr . Addr ( )
// TODO (irbekrm): make it work for IPv6 too.
if ea . Is6 ( ) {
log . Println ( "Not installing egress forwarding rules for IPv6 as this is currently not supported" )
continue
}
log . Printf ( "Installing forwarding rules for destination %v" , ea . String ( ) )
if err := installEgressForwardingRule ( ctx , ea . String ( ) , addrs , nfr ) ; err != nil {
log . Fatalf ( "installing egress proxy rules for destination %s: %v" , ea . String ( ) , err )
}
}
}
currentEgressIPs = newCurentEgressIPs
}
2023-11-16 19:23:18 +00:00
if cfg . ProxyTo != "" && len ( addrs ) > 0 && ipsHaveChanged {
log . Printf ( "Installing proxy rules" )
if err := installIngressForwardingRule ( ctx , cfg . ProxyTo , addrs , nfr ) ; err != nil {
log . Fatalf ( "installing ingress proxy rules: %v" , err )
2023-08-24 17:08:50 +01:00
}
}
2023-11-16 19:23:18 +00:00
if cfg . ServeConfigPath != "" && len ( n . NetMap . DNS . CertDomains ) > 0 {
cd := n . NetMap . DNS . CertDomains [ 0 ]
prev := certDomain . Swap ( ptr . To ( cd ) )
if prev == nil || * prev != cd {
select {
case certDomainChanged <- true :
default :
}
}
2023-08-30 08:31:37 +01:00
}
2023-11-16 19:23:18 +00:00
if cfg . TailnetTargetIP != "" && ipsHaveChanged && len ( addrs ) > 0 {
2023-11-24 16:24:48 +00:00
log . Printf ( "Installing forwarding rules for destination %v" , cfg . TailnetTargetIP )
2023-11-16 19:23:18 +00:00
if err := installEgressForwardingRule ( ctx , cfg . TailnetTargetIP , addrs , nfr ) ; err != nil {
log . Fatalf ( "installing egress proxy rules: %v" , err )
}
}
currentIPs = newCurrentIPs
2023-08-30 08:31:37 +01:00
2023-11-16 19:23:18 +00:00
deviceInfo := [ ] any { n . NetMap . SelfNode . StableID ( ) , n . NetMap . SelfNode . Name ( ) }
if cfg . InKubernetes && cfg . KubernetesCanPatch && cfg . KubeSecret != "" && deephash . Update ( & currentDeviceInfo , & deviceInfo ) {
if err := storeDeviceInfo ( ctx , cfg . KubeSecret , n . NetMap . SelfNode . StableID ( ) , n . NetMap . SelfNode . Name ( ) , n . NetMap . SelfNode . Addresses ( ) . AsSlice ( ) ) ; err != nil {
log . Fatalf ( "storing device ID in kube secret: %v" , err )
}
2022-12-07 20:29:45 +00:00
}
}
2023-11-16 19:23:18 +00:00
if ! startupTasksDone {
if ( ! wantProxy || currentIPs != deephash . Sum { } ) && ( ! wantDeviceInfo || currentDeviceInfo != deephash . Sum { } ) {
// This log message is used in tests to detect when all
// post-auth configuration is done.
log . Println ( "Startup complete, waiting for shutdown signal" )
startupTasksDone = true
2023-11-24 16:24:48 +00:00
// Reap all processes, since we are PID1 and need to collect zombies. We can
// only start doing this once we've stopped shelling out to things
// `tailscale up`, otherwise this goroutine can reap the CLI subprocesses
// and wedge bringup.
2023-11-16 19:23:18 +00:00
reaper := func ( ) {
defer wg . Done ( )
for {
var status unix . WaitStatus
pid , err := unix . Wait4 ( - 1 , & status , 0 , nil )
if errors . Is ( err , unix . EINTR ) {
continue
}
if err != nil {
log . Fatalf ( "Waiting for exited processes: %v" , err )
}
if pid == daemonProcess . Pid {
log . Printf ( "Tailscaled exited" )
os . Exit ( 0 )
}
2022-12-07 20:29:45 +00:00
}
2023-11-16 19:23:18 +00:00
2022-12-07 20:29:45 +00:00
}
2023-11-16 19:23:18 +00:00
wg . Add ( 1 )
go reaper ( )
}
2022-12-07 20:29:45 +00:00
}
2022-10-25 21:12:54 +01:00
}
}
2023-11-16 19:23:18 +00:00
wg . Wait ( )
2022-10-25 21:12:54 +01:00
}
2023-08-24 17:08:50 +01:00
// watchServeConfigChanges watches path for changes, and when it sees one, reads
// the serve config from it, replacing ${TS_CERT_DOMAIN} with certDomain, and
// applies it to lc. It exits when ctx is canceled. cdChanged is a channel that
// is written to when the certDomain changes, causing the serve config to be
// re-read and applied.
func watchServeConfigChanges ( ctx context . Context , path string , cdChanged <- chan bool , certDomainAtomic * atomic . Pointer [ string ] , lc * tailscale . LocalClient ) {
if certDomainAtomic == nil {
panic ( "cd must not be nil" )
}
2023-08-25 21:26:04 +01:00
var tickChan <- chan time . Time
2023-10-11 22:24:05 +01:00
var eventChan <- chan fsnotify . Event
if w , err := fsnotify . NewWatcher ( ) ; err != nil {
2023-08-25 21:26:04 +01:00
log . Printf ( "failed to create fsnotify watcher, timer-only mode: %v" , err )
ticker := time . NewTicker ( 5 * time . Second )
defer ticker . Stop ( )
tickChan = ticker . C
} else {
defer w . Close ( )
2023-10-11 22:24:05 +01:00
if err := w . Add ( filepath . Dir ( path ) ) ; err != nil {
log . Fatalf ( "failed to add fsnotify watch: %v" , err )
}
eventChan = w . Events
2023-08-24 17:08:50 +01:00
}
2023-08-25 21:26:04 +01:00
2023-08-24 17:08:50 +01:00
var certDomain string
2023-08-25 21:26:04 +01:00
var prevServeConfig * ipn . ServeConfig
2023-08-24 17:08:50 +01:00
for {
select {
case <- ctx . Done ( ) :
return
case <- cdChanged :
certDomain = * certDomainAtomic . Load ( )
2023-08-25 21:26:04 +01:00
case <- tickChan :
2023-10-11 22:24:05 +01:00
case <- eventChan :
2023-08-25 21:26:04 +01:00
// We can't do any reasonable filtering on the event because of how
// k8s handles these mounts. So just re-read the file and apply it
// if it's changed.
2023-08-24 17:08:50 +01:00
}
if certDomain == "" {
continue
}
sc , err := readServeConfig ( path , certDomain )
if err != nil {
log . Fatalf ( "failed to read serve config: %v" , err )
}
2023-08-25 21:26:04 +01:00
if prevServeConfig != nil && reflect . DeepEqual ( sc , prevServeConfig ) {
continue
}
log . Printf ( "Applying serve config" )
2023-08-24 17:08:50 +01:00
if err := lc . SetServeConfig ( ctx , sc ) ; err != nil {
log . Fatalf ( "failed to set serve config: %v" , err )
}
2023-08-25 21:26:04 +01:00
prevServeConfig = sc
2023-08-24 17:08:50 +01:00
}
}
// readServeConfig reads the ipn.ServeConfig from path, replacing
// ${TS_CERT_DOMAIN} with certDomain.
func readServeConfig ( path , certDomain string ) ( * ipn . ServeConfig , error ) {
if path == "" {
return nil , nil
}
j , err := os . ReadFile ( path )
if err != nil {
return nil , err
}
j = bytes . ReplaceAll ( j , [ ] byte ( "${TS_CERT_DOMAIN}" ) , [ ] byte ( certDomain ) )
var sc ipn . ServeConfig
if err := json . Unmarshal ( j , & sc ) ; err != nil {
return nil , err
}
return & sc , nil
}
2023-11-16 19:23:18 +00:00
func startTailscaled ( ctx context . Context , cfg * settings ) ( * tailscale . LocalClient , * os . Process , error ) {
2022-10-25 21:12:54 +01:00
args := tailscaledArgs ( cfg )
// tailscaled runs without context, since it needs to persist
// beyond the startup timeout in ctx.
cmd := exec . Command ( "tailscaled" , args ... )
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
cmd . SysProcAttr = & syscall . SysProcAttr {
Setpgid : true ,
}
log . Printf ( "Starting tailscaled" )
if err := cmd . Start ( ) ; err != nil {
2023-11-16 19:23:18 +00:00
return nil , nil , fmt . Errorf ( "starting tailscaled failed: %v" , err )
2022-10-25 21:12:54 +01:00
}
2022-12-05 23:38:50 +00:00
// Wait for the socket file to appear, otherwise API ops will racily fail.
2022-10-25 21:12:54 +01:00
log . Printf ( "Waiting for tailscaled socket" )
for {
if ctx . Err ( ) != nil {
log . Fatalf ( "Timed out waiting for tailscaled socket" )
}
_ , err := os . Stat ( cfg . Socket )
if errors . Is ( err , fs . ErrNotExist ) {
time . Sleep ( 100 * time . Millisecond )
continue
} else if err != nil {
log . Fatalf ( "Waiting for tailscaled socket: %v" , err )
}
break
}
2022-12-05 23:38:50 +00:00
tsClient := & tailscale . LocalClient {
Socket : cfg . Socket ,
UseSocketOnly : true ,
}
2023-11-16 19:23:18 +00:00
return tsClient , cmd . Process , nil
2022-12-05 23:38:50 +00:00
}
2022-10-25 21:12:54 +01:00
// tailscaledArgs uses cfg to construct the argv for tailscaled.
func tailscaledArgs ( cfg * settings ) [ ] string {
args := [ ] string { "--socket=" + cfg . Socket }
switch {
case cfg . InKubernetes && cfg . KubeSecret != "" :
2023-02-01 14:25:48 +00:00
args = append ( args , "--state=kube:" + cfg . KubeSecret )
if cfg . StateDir == "" {
cfg . StateDir = "/tmp"
}
fallthrough
2022-10-25 21:12:54 +01:00
case cfg . StateDir != "" :
2022-12-06 13:31:50 +00:00
args = append ( args , "--statedir=" + cfg . StateDir )
2022-10-25 21:12:54 +01:00
default :
args = append ( args , "--state=mem:" , "--statedir=/tmp" )
}
if cfg . UserspaceMode {
args = append ( args , "--tun=userspace-networking" )
2022-11-10 06:01:34 +00:00
} else if err := ensureTunFile ( cfg . Root ) ; err != nil {
2022-10-25 21:12:54 +01:00
log . Fatalf ( "ensuring that /dev/net/tun exists: %v" , err )
}
if cfg . SOCKSProxyAddr != "" {
args = append ( args , "--socks5-server=" + cfg . SOCKSProxyAddr )
}
if cfg . HTTPProxyAddr != "" {
args = append ( args , "--outbound-http-proxy-listen=" + cfg . HTTPProxyAddr )
}
if cfg . DaemonExtraArgs != "" {
args = append ( args , strings . Fields ( cfg . DaemonExtraArgs ) ... )
}
return args
}
2023-10-16 02:41:28 +01:00
// tailscaleUp uses cfg to run 'tailscale up' everytime containerboot starts, or
// if TS_AUTH_ONCE is set, only the first time containerboot starts.
func tailscaleUp ( ctx context . Context , cfg * settings ) error {
args := [ ] string { "--socket=" + cfg . Socket , "up" }
if cfg . AcceptDNS {
args = append ( args , "--accept-dns=true" )
} else {
args = append ( args , "--accept-dns=false" )
}
2023-08-24 00:08:24 +01:00
if cfg . AuthKey != "" {
args = append ( args , "--authkey=" + cfg . AuthKey )
}
2024-01-04 09:17:04 +00:00
// --advertise-routes can be passed an empty string to configure a
// device (that might have previously advertised subnet routes) to not
// advertise any routes. Respect an empty string passed by a user and
// use it to explicitly unset the routes.
if cfg . Routes != nil {
args = append ( args , "--advertise-routes=" + * cfg . Routes )
2023-10-16 02:41:28 +01:00
}
if cfg . Hostname != "" {
args = append ( args , "--hostname=" + cfg . Hostname )
}
2023-08-24 00:08:24 +01:00
if cfg . ExtraArgs != "" {
args = append ( args , strings . Fields ( cfg . ExtraArgs ) ... )
}
2023-10-16 02:41:28 +01:00
log . Printf ( "Running 'tailscale up'" )
2023-08-24 00:08:24 +01:00
cmd := exec . CommandContext ( ctx , "tailscale" , args ... )
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
if err := cmd . Run ( ) ; err != nil {
2023-10-16 02:41:28 +01:00
return fmt . Errorf ( "tailscale up failed: %v" , err )
2023-08-24 00:08:24 +01:00
}
return nil
}
// tailscaleSet uses cfg to run 'tailscale set' to set any known configuration
// options that are passed in via environment variables. This is run after the
2023-10-16 02:41:28 +01:00
// node is in Running state and only if TS_AUTH_ONCE is set.
2023-08-24 00:08:24 +01:00
func tailscaleSet ( ctx context . Context , cfg * settings ) error {
args := [ ] string { "--socket=" + cfg . Socket , "set" }
2022-10-25 21:12:54 +01:00
if cfg . AcceptDNS {
args = append ( args , "--accept-dns=true" )
} else {
args = append ( args , "--accept-dns=false" )
}
2024-01-04 09:17:04 +00:00
// --advertise-routes can be passed an empty string to configure a
// device (that might have previously advertised subnet routes) to not
// advertise any routes. Respect an empty string passed by a user and
// use it to explicitly unset the routes.
if cfg . Routes != nil {
args = append ( args , "--advertise-routes=" + * cfg . Routes )
2022-10-25 21:12:54 +01:00
}
2023-01-25 18:16:59 +00:00
if cfg . Hostname != "" {
args = append ( args , "--hostname=" + cfg . Hostname )
}
2023-08-24 00:08:24 +01:00
log . Printf ( "Running 'tailscale set'" )
2022-10-25 21:12:54 +01:00
cmd := exec . CommandContext ( ctx , "tailscale" , args ... )
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
if err := cmd . Run ( ) ; err != nil {
2023-08-24 00:08:24 +01:00
return fmt . Errorf ( "tailscale set failed: %v" , err )
2022-10-25 21:12:54 +01:00
}
return nil
}
// ensureTunFile checks that /dev/net/tun exists, creating it if
// missing.
2022-11-10 06:01:34 +00:00
func ensureTunFile ( root string ) error {
2022-10-25 21:12:54 +01:00
// Verify that /dev/net/tun exists, in some container envs it
// needs to be mknod-ed.
2022-11-10 06:01:34 +00:00
if _ , err := os . Stat ( filepath . Join ( root , "dev/net" ) ) ; errors . Is ( err , fs . ErrNotExist ) {
if err := os . MkdirAll ( filepath . Join ( root , "dev/net" ) , 0755 ) ; err != nil {
2022-10-25 21:12:54 +01:00
return err
}
}
2022-11-10 06:01:34 +00:00
if _ , err := os . Stat ( filepath . Join ( root , "dev/net/tun" ) ) ; errors . Is ( err , fs . ErrNotExist ) {
2022-10-25 21:12:54 +01:00
dev := unix . Mkdev ( 10 , 200 ) // tuntap major and minor
2022-11-10 06:01:34 +00:00
if err := unix . Mknod ( filepath . Join ( root , "dev/net/tun" ) , 0600 | unix . S_IFCHR , int ( dev ) ) ; err != nil {
2022-10-25 21:12:54 +01:00
return err
}
}
return nil
}
// ensureIPForwarding enables IPv4/IPv6 forwarding for the container.
2024-01-04 09:17:04 +00:00
func ensureIPForwarding ( root , clusterProxyTarget , tailnetTargetiP , tailnetTargetFQDN string , routes * string ) error {
2022-11-07 23:34:08 +00:00
var (
v4Forwarding , v6Forwarding bool
)
2023-08-30 08:31:37 +01:00
if clusterProxyTarget != "" {
proxyIP , err := netip . ParseAddr ( clusterProxyTarget )
2022-11-07 23:34:08 +00:00
if err != nil {
2023-08-30 08:31:37 +01:00
return fmt . Errorf ( "invalid cluster destination IP: %v" , err )
}
if proxyIP . Is4 ( ) {
v4Forwarding = true
} else {
v6Forwarding = true
}
}
if tailnetTargetiP != "" {
proxyIP , err := netip . ParseAddr ( tailnetTargetiP )
if err != nil {
return fmt . Errorf ( "invalid tailnet destination IP: %v" , err )
2022-11-07 23:34:08 +00:00
}
2022-11-10 06:01:34 +00:00
if proxyIP . Is4 ( ) {
2022-11-07 23:34:08 +00:00
v4Forwarding = true
} else {
v6Forwarding = true
}
}
2023-11-24 16:24:48 +00:00
// Currently we only proxy traffic to the IPv4 address of the tailnet
// target.
if tailnetTargetFQDN != "" {
v4Forwarding = true
}
2024-01-04 09:17:04 +00:00
if routes != nil {
for _ , route := range strings . Split ( * routes , "," ) {
2022-11-10 06:01:34 +00:00
cidr , err := netip . ParsePrefix ( route )
if err != nil {
return fmt . Errorf ( "invalid subnet route: %v" , err )
}
if cidr . Addr ( ) . Is4 ( ) {
v4Forwarding = true
} else {
v6Forwarding = true
}
}
}
2022-11-07 23:34:08 +00:00
var paths [ ] string
if v4Forwarding {
2022-11-10 06:01:34 +00:00
paths = append ( paths , filepath . Join ( root , "proc/sys/net/ipv4/ip_forward" ) )
2022-11-07 23:34:08 +00:00
}
if v6Forwarding {
2022-11-10 06:01:34 +00:00
paths = append ( paths , filepath . Join ( root , "proc/sys/net/ipv6/conf/all/forwarding" ) )
2022-11-07 23:34:08 +00:00
}
2022-10-25 21:12:54 +01:00
// In some common configurations (e.g. default docker,
// kubernetes), the container environment denies write access to
// most sysctls, including IP forwarding controls. Check the
// sysctl values before trying to change them, so that we
// gracefully do nothing if the container's already been set up
// properly by e.g. a k8s initContainer.
2022-11-07 23:34:08 +00:00
for _ , path := range paths {
2022-10-25 21:12:54 +01:00
bs , err := os . ReadFile ( path )
if err != nil {
return fmt . Errorf ( "reading %q: %w" , path , err )
}
if v := strings . TrimSpace ( string ( bs ) ) ; v != "1" {
if err := os . WriteFile ( path , [ ] byte ( "1" ) , 0644 ) ; err != nil {
return fmt . Errorf ( "enabling %q: %w" , path , err )
}
}
}
return nil
}
2023-10-11 15:26:40 +01:00
func installEgressForwardingRule ( ctx context . Context , dstStr string , tsIPs [ ] netip . Prefix , nfr linuxfw . NetfilterRunner ) error {
2023-08-30 08:31:37 +01:00
dst , err := netip . ParseAddr ( dstStr )
if err != nil {
return err
}
2023-10-11 15:26:40 +01:00
var local netip . Addr
2023-08-30 08:31:37 +01:00
for _ , pfx := range tsIPs {
if ! pfx . IsSingleIP ( ) {
continue
}
if pfx . Addr ( ) . Is4 ( ) != dst . Is4 ( ) {
continue
}
2023-10-11 15:26:40 +01:00
local = pfx . Addr ( )
2023-08-30 08:31:37 +01:00
break
}
2023-10-11 15:26:40 +01:00
if ! local . IsValid ( ) {
2023-08-30 08:31:37 +01:00
return fmt . Errorf ( "no tailscale IP matching family of %s found in %v" , dstStr , tsIPs )
}
2023-10-11 15:26:40 +01:00
if err := nfr . DNATNonTailscaleTraffic ( "tailscale0" , dst ) ; err != nil {
return fmt . Errorf ( "installing egress proxy rules: %w" , err )
}
if err := nfr . AddSNATRuleForDst ( local , dst ) ; err != nil {
return fmt . Errorf ( "installing egress proxy rules: %w" , err )
}
if err := nfr . ClampMSSToPMTU ( "tailscale0" , dst ) ; err != nil {
return fmt . Errorf ( "installing egress proxy rules: %w" , err )
2023-09-20 18:59:13 +01:00
}
2023-08-30 08:31:37 +01:00
return nil
}
2023-10-11 15:26:40 +01:00
func installIngressForwardingRule ( ctx context . Context , dstStr string , tsIPs [ ] netip . Prefix , nfr linuxfw . NetfilterRunner ) error {
2022-10-25 21:12:54 +01:00
dst , err := netip . ParseAddr ( dstStr )
if err != nil {
return err
}
2023-10-11 15:26:40 +01:00
var local netip . Addr
2022-12-07 20:29:45 +00:00
for _ , pfx := range tsIPs {
if ! pfx . IsSingleIP ( ) {
continue
}
if pfx . Addr ( ) . Is4 ( ) != dst . Is4 ( ) {
2022-10-25 21:12:54 +01:00
continue
}
2023-10-11 15:26:40 +01:00
local = pfx . Addr ( )
2022-10-25 21:12:54 +01:00
break
}
2023-10-11 15:26:40 +01:00
if ! local . IsValid ( ) {
2022-10-25 21:12:54 +01:00
return fmt . Errorf ( "no tailscale IP matching family of %s found in %v" , dstStr , tsIPs )
}
2023-10-11 15:26:40 +01:00
if err := nfr . AddDNATRule ( local , dst ) ; err != nil {
return fmt . Errorf ( "installing ingress proxy rules: %w" , err )
2022-10-25 21:12:54 +01:00
}
2023-10-11 15:26:40 +01:00
if err := nfr . ClampMSSToPMTU ( "tailscale0" , dst ) ; err != nil {
return fmt . Errorf ( "installing ingress proxy rules: %w" , err )
2023-09-20 18:59:13 +01:00
}
2022-10-25 21:12:54 +01:00
return nil
}
// settings is all the configuration for containerboot.
type settings struct {
2023-08-30 08:31:37 +01:00
AuthKey string
Hostname string
2024-01-04 09:17:04 +00:00
Routes * string
2023-08-30 08:31:37 +01:00
// ProxyTo is the destination IP to which all incoming
// Tailscale traffic should be proxied. If empty, no proxying
// is done. This is typically a locally reachable IP.
ProxyTo string
// TailnetTargetIP is the destination IP to which all incoming
2023-11-24 16:24:48 +00:00
// non-Tailscale traffic should be proxied. This is typically a
// Tailscale IP.
TailnetTargetIP string
// TailnetTargetFQDN is an MagicDNS name to which all incoming
// non-Tailscale traffic should be proxied. This must be a full Tailnet
// node FQDN.
TailnetTargetFQDN string
2023-08-24 17:08:50 +01:00
ServeConfigPath string
2022-12-07 01:03:53 +00:00
DaemonExtraArgs string
ExtraArgs string
InKubernetes bool
UserspaceMode bool
StateDir string
AcceptDNS bool
KubeSecret string
SOCKSProxyAddr string
HTTPProxyAddr string
Socket string
AuthOnce bool
Root string
KubernetesCanPatch bool
2022-10-25 21:12:54 +01:00
}
// defaultEnv returns the value of the given envvar name, or defVal if
// unset.
func defaultEnv ( name , defVal string ) string {
2022-12-16 22:09:46 +00:00
if v , ok := os . LookupEnv ( name ) ; ok {
2022-10-25 21:12:54 +01:00
return v
}
return defVal
}
2024-01-04 09:17:04 +00:00
// defaultEnvPointer returns a pointer to the given envvar value if set, else
// returns nil. This is useful in cases where we need to distinguish between a
// variable being set to empty string vs unset.
func defaultEnvPointer ( name string ) * string {
if v , ok := os . LookupEnv ( name ) ; ok {
return & v
}
return nil
}
2023-01-03 23:17:44 +00:00
func defaultEnvs ( names [ ] string , defVal string ) string {
for _ , name := range names {
if v , ok := os . LookupEnv ( name ) ; ok {
return v
}
}
return defVal
}
2022-10-25 21:12:54 +01:00
// defaultBool returns the boolean value of the given envvar name, or
// defVal if unset or not a bool.
func defaultBool ( name string , defVal bool ) bool {
v := os . Getenv ( name )
ret , err := strconv . ParseBool ( v )
if err != nil {
return defVal
}
return ret
}
2023-11-16 19:23:18 +00:00
// contextWithExitSignalWatch watches for SIGTERM/SIGINT signals. It returns a
// context that gets cancelled when a signal is received and a cancel function
// that can be called to free the resources when the watch should be stopped.
func contextWithExitSignalWatch ( ) ( context . Context , func ( ) ) {
closeChan := make ( chan string )
ctx , cancel := context . WithCancel ( context . Background ( ) )
signalChan := make ( chan os . Signal , 1 )
signal . Notify ( signalChan , syscall . SIGINT , syscall . SIGTERM )
go func ( ) {
select {
case <- signalChan :
cancel ( )
case <- closeChan :
return
}
} ( )
f := func ( ) {
closeChan <- "goodbye"
}
return ctx , f
}