1556 lines
44 KiB
Go
1556 lines
44 KiB
Go
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package router
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/netip"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
"sync/atomic"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/coreos/go-iptables/iptables"
|
|
"github.com/tailscale/netlink"
|
|
"go4.org/netipx"
|
|
"golang.org/x/sys/unix"
|
|
"golang.org/x/time/rate"
|
|
"golang.zx2c4.com/wireguard/tun"
|
|
"tailscale.com/envknob"
|
|
"tailscale.com/net/tsaddr"
|
|
"tailscale.com/types/logger"
|
|
"tailscale.com/types/preftype"
|
|
"tailscale.com/util/multierr"
|
|
"tailscale.com/version/distro"
|
|
"tailscale.com/wgengine/monitor"
|
|
)
|
|
|
|
const (
|
|
netfilterOff = preftype.NetfilterOff
|
|
netfilterNoDivert = preftype.NetfilterNoDivert
|
|
netfilterOn = preftype.NetfilterOn
|
|
)
|
|
|
|
// The following bits are added to packet marks for Tailscale use.
|
|
//
|
|
// We tried to pick bits sufficiently out of the way that it's
|
|
// unlikely to collide with existing uses. We have 4 bytes of mark
|
|
// bits to play with. We leave the lower byte alone on the assumption
|
|
// that sysadmins would use those. Kubernetes uses a few bits in the
|
|
// second byte, so we steer clear of that too.
|
|
//
|
|
// Empirically, most of the documentation on packet marks on the
|
|
// internet gives the impression that the marks are 16 bits
|
|
// wide. Based on this, we theorize that the upper two bytes are
|
|
// relatively unused in the wild, and so we consume bits starting at
|
|
// the 17th.
|
|
//
|
|
// The constants are in the iptables/iproute2 string format for
|
|
// matching and setting the bits, so they can be directly embedded in
|
|
// commands.
|
|
const (
|
|
// Packet is from Tailscale and to a subnet route destination, so
|
|
// is allowed to be routed through this machine.
|
|
tailscaleSubnetRouteMark = "0x40000"
|
|
|
|
// Packet was originated by tailscaled itself, and must not be
|
|
// routed over the Tailscale network.
|
|
//
|
|
// Keep this in sync with tailscaleBypassMark in
|
|
// net/netns/netns_linux.go.
|
|
tailscaleBypassMark = "0x80000"
|
|
tailscaleBypassMarkNum = 0x80000
|
|
)
|
|
|
|
// netfilterRunner abstracts helpers to run netfilter commands. It
|
|
// exists purely to swap out go-iptables for a fake implementation in
|
|
// tests.
|
|
type netfilterRunner interface {
|
|
Insert(table, chain string, pos int, args ...string) error
|
|
Append(table, chain string, args ...string) error
|
|
Exists(table, chain string, args ...string) (bool, error)
|
|
Delete(table, chain string, args ...string) error
|
|
ClearChain(table, chain string) error
|
|
NewChain(table, chain string) error
|
|
DeleteChain(table, chain string) error
|
|
}
|
|
|
|
type linuxRouter struct {
|
|
closed atomic.Bool
|
|
logf func(fmt string, args ...any)
|
|
tunname string
|
|
linkMon *monitor.Mon
|
|
unregLinkMon func()
|
|
addrs map[netip.Prefix]bool
|
|
routes map[netip.Prefix]bool
|
|
localRoutes map[netip.Prefix]bool
|
|
snatSubnetRoutes bool
|
|
netfilterMode preftype.NetfilterMode
|
|
|
|
// ruleRestorePending is whether a timer has been started to
|
|
// restore deleted ip rules.
|
|
ruleRestorePending atomic.Bool
|
|
ipRuleFixLimiter *rate.Limiter
|
|
|
|
// Various feature checks for the network stack.
|
|
ipRuleAvailable bool // whether kernel was built with IP_MULTIPLE_TABLES
|
|
v6Available bool
|
|
v6NATAvailable bool
|
|
|
|
ipt4 netfilterRunner
|
|
ipt6 netfilterRunner
|
|
cmd commandRunner
|
|
}
|
|
|
|
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, linkMon *monitor.Mon) (Router, error) {
|
|
tunname, err := tunDev.Name()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ipt4, err := iptables.NewWithProtocol(iptables.ProtocolIPv4)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
v6err := checkIPv6(logf)
|
|
if v6err != nil {
|
|
logf("disabling tunneled IPv6 due to system IPv6 config: %v", v6err)
|
|
}
|
|
supportsV6 := v6err == nil
|
|
supportsV6NAT := supportsV6 && supportsV6NAT()
|
|
if supportsV6 {
|
|
logf("v6nat = %v", supportsV6NAT)
|
|
}
|
|
|
|
var ipt6 netfilterRunner
|
|
if supportsV6 {
|
|
// The iptables package probes for `ip6tables` and errors out
|
|
// if unavailable. We want that to be a non-fatal error.
|
|
ipt6, err = iptables.NewWithProtocol(iptables.ProtocolIPv6)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
cmd := osCommandRunner{
|
|
ambientCapNetAdmin: useAmbientCaps(),
|
|
}
|
|
|
|
return newUserspaceRouterAdvanced(logf, tunname, linkMon, ipt4, ipt6, cmd, supportsV6, supportsV6NAT)
|
|
}
|
|
|
|
func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, linkMon *monitor.Mon, netfilter4, netfilter6 netfilterRunner, cmd commandRunner, supportsV6, supportsV6NAT bool) (Router, error) {
|
|
r := &linuxRouter{
|
|
logf: logf,
|
|
tunname: tunname,
|
|
netfilterMode: netfilterOff,
|
|
linkMon: linkMon,
|
|
|
|
v6Available: supportsV6,
|
|
v6NATAvailable: supportsV6NAT,
|
|
|
|
ipt4: netfilter4,
|
|
ipt6: netfilter6,
|
|
cmd: cmd,
|
|
|
|
ipRuleFixLimiter: rate.NewLimiter(rate.Every(5*time.Second), 10),
|
|
}
|
|
if r.useIPCommand() {
|
|
r.ipRuleAvailable = (cmd.run("ip", "rule") == nil)
|
|
} else {
|
|
if rules, err := netlink.RuleList(netlink.FAMILY_V4); err != nil {
|
|
r.logf("error querying IP rules (does kernel have IP_MULTIPLE_TABLES?): %v", err)
|
|
r.logf("warning: running without policy routing")
|
|
} else {
|
|
r.logf("[v1] policy routing available; found %d rules", len(rules))
|
|
r.ipRuleAvailable = true
|
|
}
|
|
}
|
|
|
|
return r, nil
|
|
}
|
|
|
|
func useAmbientCaps() bool {
|
|
if distro.Get() != distro.Synology {
|
|
return false
|
|
}
|
|
return distro.DSMVersion() >= 7
|
|
}
|
|
|
|
var forceIPCommand = envknob.Bool("TS_DEBUG_USE_IP_COMMAND")
|
|
|
|
// useIPCommand reports whether r should use the "ip" command (or its
|
|
// fake commandRunner for tests) instead of netlink.
|
|
func (r *linuxRouter) useIPCommand() bool {
|
|
if r.cmd == nil {
|
|
panic("invalid init")
|
|
}
|
|
if forceIPCommand {
|
|
return true
|
|
}
|
|
// In the future we might need to fall back to using the "ip"
|
|
// command if, say, netlink is blocked somewhere but the ip
|
|
// command is allowed to use netlink. For now we only use the ip
|
|
// command runner in tests.
|
|
_, ok := r.cmd.(osCommandRunner)
|
|
return !ok
|
|
}
|
|
|
|
// onIPRuleDeleted is the callback from the link monitor for when an IP policy
|
|
// rule is deleted. See Issue 1591.
|
|
//
|
|
// If an ip rule is deleted (with pref number 52xx, as Tailscale sets), then
|
|
// set a timer to restore our rules, in case they were deleted. The timer lets
|
|
// us do one fixup in response to a batch of rule deletes. It also lets us
|
|
// delay arbitrarily to prevent a high-speed fight over the rule between
|
|
// competing processes. (Although empirically, systemd doesn't fight us
|
|
// like that... yet.)
|
|
//
|
|
// Note that we don't care about the table number. We don't strictly even care
|
|
// about the priority number. We could just do this in response to any netlink
|
|
// change. Filtering by known priority ranges cuts back on some logspam.
|
|
func (r *linuxRouter) onIPRuleDeleted(table uint8, priority uint32) {
|
|
if priority < 5200 || priority >= 5300 {
|
|
// Not our rule.
|
|
return
|
|
}
|
|
if !r.ruleRestorePending.Swap(true) {
|
|
// Another timer is already pending.
|
|
return
|
|
}
|
|
rr := r.ipRuleFixLimiter.Reserve()
|
|
if !rr.OK() {
|
|
r.ruleRestorePending.Swap(false)
|
|
return
|
|
}
|
|
time.AfterFunc(rr.Delay()+250*time.Millisecond, func() {
|
|
if r.ruleRestorePending.Swap(false) && !r.closed.Load() {
|
|
r.logf("somebody (likely systemd-networkd) deleted ip rules; restoring Tailscale's")
|
|
r.justAddIPRules()
|
|
}
|
|
})
|
|
}
|
|
|
|
func (r *linuxRouter) Up() error {
|
|
if r.unregLinkMon == nil && r.linkMon != nil {
|
|
r.unregLinkMon = r.linkMon.RegisterRuleDeleteCallback(r.onIPRuleDeleted)
|
|
}
|
|
if err := r.addIPRules(); err != nil {
|
|
return fmt.Errorf("adding IP rules: %w", err)
|
|
}
|
|
if err := r.setNetfilterMode(netfilterOff); err != nil {
|
|
return fmt.Errorf("setting netfilter mode: %w", err)
|
|
}
|
|
if err := r.upInterface(); err != nil {
|
|
return fmt.Errorf("bringing interface up: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r *linuxRouter) Close() error {
|
|
r.closed.Store(true)
|
|
if r.unregLinkMon != nil {
|
|
r.unregLinkMon()
|
|
}
|
|
if err := r.downInterface(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.delIPRules(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.setNetfilterMode(netfilterOff); err != nil {
|
|
return err
|
|
}
|
|
if err := r.delRoutes(); err != nil {
|
|
return err
|
|
}
|
|
|
|
r.addrs = nil
|
|
r.routes = nil
|
|
r.localRoutes = nil
|
|
|
|
return nil
|
|
}
|
|
|
|
// Set implements the Router interface.
|
|
func (r *linuxRouter) Set(cfg *Config) error {
|
|
var errs []error
|
|
if cfg == nil {
|
|
cfg = &shutdownConfig
|
|
}
|
|
|
|
if err := r.setNetfilterMode(cfg.NetfilterMode); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
|
|
newLocalRoutes, err := cidrDiff("localRoute", r.localRoutes, cfg.LocalRoutes, r.addThrowRoute, r.delThrowRoute, r.logf)
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
r.localRoutes = newLocalRoutes
|
|
|
|
newRoutes, err := cidrDiff("route", r.routes, cfg.Routes, r.addRoute, r.delRoute, r.logf)
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
r.routes = newRoutes
|
|
|
|
newAddrs, err := cidrDiff("addr", r.addrs, cfg.LocalAddrs, r.addAddress, r.delAddress, r.logf)
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
r.addrs = newAddrs
|
|
|
|
switch {
|
|
case cfg.SNATSubnetRoutes == r.snatSubnetRoutes:
|
|
// state already correct, nothing to do.
|
|
case cfg.SNATSubnetRoutes:
|
|
if err := r.addSNATRule(); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
default:
|
|
if err := r.delSNATRule(); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
r.snatSubnetRoutes = cfg.SNATSubnetRoutes
|
|
|
|
return multierr.New(errs...)
|
|
}
|
|
|
|
// setNetfilterMode switches the router to the given netfilter
|
|
// mode. Netfilter state is created or deleted appropriately to
|
|
// reflect the new mode, and r.snatSubnetRoutes is updated to reflect
|
|
// the current state of subnet SNATing.
|
|
func (r *linuxRouter) setNetfilterMode(mode preftype.NetfilterMode) error {
|
|
if distro.Get() == distro.Synology {
|
|
mode = netfilterOff
|
|
}
|
|
if r.netfilterMode == mode {
|
|
return nil
|
|
}
|
|
|
|
// Depending on the netfilter mode we switch from and to, we may
|
|
// have created the Tailscale netfilter chains. If so, we have to
|
|
// go back through existing router state, and add the netfilter
|
|
// rules for that state.
|
|
//
|
|
// This bool keeps track of whether the current state transition
|
|
// is one that requires adding rules of existing state.
|
|
reprocess := false
|
|
|
|
switch mode {
|
|
case netfilterOff:
|
|
switch r.netfilterMode {
|
|
case netfilterNoDivert:
|
|
if err := r.delNetfilterBase(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.delNetfilterChains(); err != nil {
|
|
r.logf("note: %v", err)
|
|
// harmless, continue.
|
|
// This can happen if someone left a ref to
|
|
// this table somewhere else.
|
|
}
|
|
case netfilterOn:
|
|
if err := r.delNetfilterHooks(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.delNetfilterBase(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.delNetfilterChains(); err != nil {
|
|
r.logf("note: %v", err)
|
|
// harmless, continue.
|
|
// This can happen if someone left a ref to
|
|
// this table somewhere else.
|
|
}
|
|
}
|
|
r.snatSubnetRoutes = false
|
|
case netfilterNoDivert:
|
|
switch r.netfilterMode {
|
|
case netfilterOff:
|
|
reprocess = true
|
|
if err := r.addNetfilterChains(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.addNetfilterBase(); err != nil {
|
|
return err
|
|
}
|
|
r.snatSubnetRoutes = false
|
|
case netfilterOn:
|
|
if err := r.delNetfilterHooks(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
case netfilterOn:
|
|
// Because of bugs in old version of iptables-compat,
|
|
// we can't add a "-j ts-forward" rule to FORWARD
|
|
// while ts-forward contains an "-m mark" rule. But
|
|
// we can add the row *before* populating ts-forward.
|
|
// So we have to delNetFilterBase, then add the hooks,
|
|
// then re-addNetFilterBase, just in case.
|
|
switch r.netfilterMode {
|
|
case netfilterOff:
|
|
reprocess = true
|
|
if err := r.addNetfilterChains(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.delNetfilterBase(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.addNetfilterHooks(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.addNetfilterBase(); err != nil {
|
|
return err
|
|
}
|
|
r.snatSubnetRoutes = false
|
|
case netfilterNoDivert:
|
|
reprocess = true
|
|
if err := r.delNetfilterBase(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.addNetfilterHooks(); err != nil {
|
|
return err
|
|
}
|
|
if err := r.addNetfilterBase(); err != nil {
|
|
return err
|
|
}
|
|
r.snatSubnetRoutes = false
|
|
}
|
|
default:
|
|
panic("unhandled netfilter mode")
|
|
}
|
|
|
|
r.netfilterMode = mode
|
|
|
|
if !reprocess {
|
|
return nil
|
|
}
|
|
|
|
for cidr := range r.addrs {
|
|
if err := r.addLoopbackRule(cidr.Addr()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// addAddress adds an IP/mask to the tunnel interface. Fails if the
|
|
// address is already assigned to the interface, or if the addition
|
|
// fails.
|
|
func (r *linuxRouter) addAddress(addr netip.Prefix) error {
|
|
if !r.v6Available && addr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
if r.useIPCommand() {
|
|
if err := r.cmd.run("ip", "addr", "add", addr.String(), "dev", r.tunname); err != nil {
|
|
return fmt.Errorf("adding address %q to tunnel interface: %w", addr, err)
|
|
}
|
|
} else {
|
|
link, err := r.link()
|
|
if err != nil {
|
|
return fmt.Errorf("adding address %v, %w", addr, err)
|
|
}
|
|
if err := netlink.AddrReplace(link, nlAddrOfPrefix(addr)); err != nil {
|
|
return fmt.Errorf("adding address %v from tunnel interface: %w", addr, err)
|
|
}
|
|
}
|
|
if err := r.addLoopbackRule(addr.Addr()); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// delAddress removes an IP/mask from the tunnel interface. Fails if
|
|
// the address is not assigned to the interface, or if the removal
|
|
// fails.
|
|
func (r *linuxRouter) delAddress(addr netip.Prefix) error {
|
|
if !r.v6Available && addr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
if err := r.delLoopbackRule(addr.Addr()); err != nil {
|
|
return err
|
|
}
|
|
if r.useIPCommand() {
|
|
if err := r.cmd.run("ip", "addr", "del", addr.String(), "dev", r.tunname); err != nil {
|
|
return fmt.Errorf("deleting address %q from tunnel interface: %w", addr, err)
|
|
}
|
|
} else {
|
|
link, err := r.link()
|
|
if err != nil {
|
|
return fmt.Errorf("deleting address %v, %w", addr, err)
|
|
}
|
|
if err := netlink.AddrDel(link, nlAddrOfPrefix(addr)); err != nil {
|
|
return fmt.Errorf("deleting address %v from tunnel interface: %w", addr, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addLoopbackRule adds a firewall rule to permit loopback traffic to
|
|
// a local Tailscale IP.
|
|
func (r *linuxRouter) addLoopbackRule(addr netip.Addr) error {
|
|
if r.netfilterMode == netfilterOff {
|
|
return nil
|
|
}
|
|
|
|
nf := r.ipt4
|
|
if addr.Is6() {
|
|
if !r.v6Available {
|
|
// IPv6 not available, ignore.
|
|
return nil
|
|
}
|
|
nf = r.ipt6
|
|
}
|
|
|
|
if err := nf.Insert("filter", "ts-input", 1, "-i", "lo", "-s", addr.String(), "-j", "ACCEPT"); err != nil {
|
|
return fmt.Errorf("adding loopback allow rule for %q: %w", addr, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// delLoopbackRule removes the firewall rule permitting loopback
|
|
// traffic to a Tailscale IP.
|
|
func (r *linuxRouter) delLoopbackRule(addr netip.Addr) error {
|
|
if r.netfilterMode == netfilterOff {
|
|
return nil
|
|
}
|
|
|
|
nf := r.ipt4
|
|
if addr.Is6() {
|
|
if !r.v6Available {
|
|
// IPv6 not available, ignore.
|
|
return nil
|
|
}
|
|
nf = r.ipt6
|
|
}
|
|
|
|
if err := nf.Delete("filter", "ts-input", "-i", "lo", "-s", addr.String(), "-j", "ACCEPT"); err != nil {
|
|
return fmt.Errorf("deleting loopback allow rule for %q: %w", addr, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addRoute adds a route for cidr, pointing to the tunnel
|
|
// interface. Fails if the route already exists, or if adding the
|
|
// route fails.
|
|
func (r *linuxRouter) addRoute(cidr netip.Prefix) error {
|
|
if !r.v6Available && cidr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
if r.useIPCommand() {
|
|
return r.addRouteDef([]string{normalizeCIDR(cidr), "dev", r.tunname}, cidr)
|
|
}
|
|
linkIndex, err := r.linkIndex()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return netlink.RouteReplace(&netlink.Route{
|
|
LinkIndex: linkIndex,
|
|
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
|
Table: r.routeTable(),
|
|
})
|
|
}
|
|
|
|
// addThrowRoute adds a throw route for the provided cidr.
|
|
// This has the effect that lookup in the routing table is terminated
|
|
// pretending that no route was found. Fails if the route already exists,
|
|
// or if adding the route fails.
|
|
func (r *linuxRouter) addThrowRoute(cidr netip.Prefix) error {
|
|
if !r.ipRuleAvailable {
|
|
return nil
|
|
}
|
|
if !r.v6Available && cidr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
if r.useIPCommand() {
|
|
return r.addRouteDef([]string{"throw", normalizeCIDR(cidr)}, cidr)
|
|
}
|
|
err := netlink.RouteReplace(&netlink.Route{
|
|
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
|
Table: tailscaleRouteTable.num,
|
|
Type: unix.RTN_THROW,
|
|
})
|
|
if err != nil {
|
|
r.logf("THROW ERROR adding %v: %#v", cidr, err)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (r *linuxRouter) addRouteDef(routeDef []string, cidr netip.Prefix) error {
|
|
if !r.v6Available && cidr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
args := append([]string{"ip", "route", "add"}, routeDef...)
|
|
if r.ipRuleAvailable {
|
|
args = append(args, "table", tailscaleRouteTable.ipCmdArg())
|
|
}
|
|
err := r.cmd.run(args...)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
// This is an ugly hack to detect failure to add a route that
|
|
// already exists (as happens in when we're racing to add
|
|
// kernel-maintained routes when enabling exit nodes w/o Local
|
|
// LAN access, Issue 3060). Fortunately in the common case we
|
|
// use netlink directly instead and don't exercise this code.
|
|
if errCode(err) == 2 && strings.Contains(err.Error(), "RTNETLINK answers: File exists") {
|
|
r.logf("ignoring route add of %v; already exists", cidr)
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
var (
|
|
errESRCH error = syscall.ESRCH
|
|
errENOENT error = syscall.ENOENT
|
|
errEEXIST error = syscall.EEXIST
|
|
)
|
|
|
|
// delRoute removes the route for cidr pointing to the tunnel
|
|
// interface. Fails if the route doesn't exist, or if removing the
|
|
// route fails.
|
|
func (r *linuxRouter) delRoute(cidr netip.Prefix) error {
|
|
if !r.v6Available && cidr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
if r.useIPCommand() {
|
|
return r.delRouteDef([]string{normalizeCIDR(cidr), "dev", r.tunname}, cidr)
|
|
}
|
|
linkIndex, err := r.linkIndex()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = netlink.RouteDel(&netlink.Route{
|
|
LinkIndex: linkIndex,
|
|
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
|
Table: r.routeTable(),
|
|
})
|
|
if errors.Is(err, errESRCH) {
|
|
// Didn't exist to begin with.
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
// delThrowRoute removes the throw route for the cidr. Fails if the route
|
|
// doesn't exist, or if removing the route fails.
|
|
func (r *linuxRouter) delThrowRoute(cidr netip.Prefix) error {
|
|
if !r.ipRuleAvailable {
|
|
return nil
|
|
}
|
|
if !r.v6Available && cidr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
if r.useIPCommand() {
|
|
return r.delRouteDef([]string{"throw", normalizeCIDR(cidr)}, cidr)
|
|
}
|
|
err := netlink.RouteDel(&netlink.Route{
|
|
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
|
Table: r.routeTable(),
|
|
Type: unix.RTN_THROW,
|
|
})
|
|
if errors.Is(err, errESRCH) {
|
|
// Didn't exist to begin with.
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (r *linuxRouter) delRouteDef(routeDef []string, cidr netip.Prefix) error {
|
|
if !r.v6Available && cidr.Addr().Is6() {
|
|
return nil
|
|
}
|
|
args := append([]string{"ip", "route", "del"}, routeDef...)
|
|
if r.ipRuleAvailable {
|
|
args = append(args, "table", tailscaleRouteTable.ipCmdArg())
|
|
}
|
|
err := r.cmd.run(args...)
|
|
if err != nil {
|
|
ok, err := r.hasRoute(routeDef, cidr)
|
|
if err != nil {
|
|
r.logf("warning: error checking whether %v even exists after error deleting it: %v", err)
|
|
} else {
|
|
if !ok {
|
|
r.logf("warning: tried to delete route %v but it was already gone; ignoring error", cidr)
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
|
|
func dashFam(ip netip.Addr) string {
|
|
if ip.Is6() {
|
|
return "-6"
|
|
}
|
|
return "-4"
|
|
}
|
|
|
|
func (r *linuxRouter) hasRoute(routeDef []string, cidr netip.Prefix) (bool, error) {
|
|
args := append([]string{"ip", dashFam(cidr.Addr()), "route", "show"}, routeDef...)
|
|
if r.ipRuleAvailable {
|
|
args = append(args, "table", tailscaleRouteTable.ipCmdArg())
|
|
}
|
|
out, err := r.cmd.output(args...)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return len(out) > 0, nil
|
|
}
|
|
|
|
func (r *linuxRouter) link() (netlink.Link, error) {
|
|
link, err := netlink.LinkByName(r.tunname)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to look up link %q: %w", r.tunname, err)
|
|
}
|
|
return link, nil
|
|
}
|
|
|
|
func (r *linuxRouter) linkIndex() (int, error) {
|
|
// TODO(bradfitz): cache this? It doesn't change often, and on start-up
|
|
// hundreds of addRoute calls to add /32s can happen quickly.
|
|
link, err := r.link()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return link.Attrs().Index, nil
|
|
}
|
|
|
|
// routeTable returns the route table to use.
|
|
func (r *linuxRouter) routeTable() int {
|
|
if r.ipRuleAvailable {
|
|
return tailscaleRouteTable.num
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// upInterface brings up the tunnel interface.
|
|
func (r *linuxRouter) upInterface() error {
|
|
if r.useIPCommand() {
|
|
return r.cmd.run("ip", "link", "set", "dev", r.tunname, "up")
|
|
}
|
|
link, err := r.link()
|
|
if err != nil {
|
|
return fmt.Errorf("bringing interface up, %w", err)
|
|
}
|
|
return netlink.LinkSetUp(link)
|
|
}
|
|
|
|
// downInterface sets the tunnel interface administratively down.
|
|
func (r *linuxRouter) downInterface() error {
|
|
if r.useIPCommand() {
|
|
return r.cmd.run("ip", "link", "set", "dev", r.tunname, "down")
|
|
}
|
|
link, err := r.link()
|
|
if err != nil {
|
|
return fmt.Errorf("bringing interface down, %w", err)
|
|
}
|
|
return netlink.LinkSetDown(link)
|
|
}
|
|
|
|
// addrFamily is an address family: IPv4 or IPv6.
|
|
type addrFamily byte
|
|
|
|
const (
|
|
v4 = addrFamily(4)
|
|
v6 = addrFamily(6)
|
|
)
|
|
|
|
func (f addrFamily) dashArg() string {
|
|
switch f {
|
|
case 4:
|
|
return "-4"
|
|
case 6:
|
|
return "-6"
|
|
}
|
|
panic("illegal")
|
|
}
|
|
|
|
func (f addrFamily) netlinkInt() int {
|
|
switch f {
|
|
case 4:
|
|
return netlink.FAMILY_V4
|
|
case 6:
|
|
return netlink.FAMILY_V6
|
|
}
|
|
panic("illegal")
|
|
}
|
|
|
|
func (r *linuxRouter) addrFamilies() []addrFamily {
|
|
if r.v6Available {
|
|
return []addrFamily{v4, v6}
|
|
}
|
|
return []addrFamily{v4}
|
|
}
|
|
|
|
// addIPRules adds the policy routing rule that avoids tailscaled
|
|
// routing loops. If the rule exists and appears to be a
|
|
// tailscale-managed rule, it is gracefully replaced.
|
|
func (r *linuxRouter) addIPRules() error {
|
|
if !r.ipRuleAvailable {
|
|
return nil
|
|
}
|
|
|
|
// Clear out old rules. After that, any error adding a rule is fatal,
|
|
// because there should be no reason we add a duplicate.
|
|
if err := r.delIPRules(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return r.justAddIPRules()
|
|
}
|
|
|
|
// routeTable is a Linux routing table: both its name and number.
|
|
// See /etc/iproute2/rt_tables.
|
|
type routeTable struct {
|
|
name string
|
|
num int
|
|
}
|
|
|
|
// ipCmdArg returns the string form of the table to pass to the "ip" command.
|
|
func (rt routeTable) ipCmdArg() string {
|
|
if rt.num >= 253 {
|
|
return rt.name
|
|
}
|
|
return strconv.Itoa(rt.num)
|
|
}
|
|
|
|
var routeTableByNumber = map[int]routeTable{}
|
|
|
|
func newRouteTable(name string, num int) routeTable {
|
|
rt := routeTable{name, num}
|
|
routeTableByNumber[num] = rt
|
|
return rt
|
|
}
|
|
|
|
func mustRouteTable(num int) routeTable {
|
|
rt, ok := routeTableByNumber[num]
|
|
if !ok {
|
|
panic(fmt.Sprintf("unknown route table %v", num))
|
|
}
|
|
return rt
|
|
}
|
|
|
|
var (
|
|
mainRouteTable = newRouteTable("main", 254)
|
|
defaultRouteTable = newRouteTable("default", 253)
|
|
|
|
// tailscaleRouteTable is the routing table number for Tailscale
|
|
// network routes. See addIPRules for the detailed policy routing
|
|
// logic that ends up doing lookups within that table.
|
|
//
|
|
// NOTE(danderson): We chose 52 because those are the digits above the
|
|
// letters "TS" on a qwerty keyboard, and 52 is sufficiently unlikely
|
|
// to be picked by other software.
|
|
//
|
|
// NOTE(danderson): You might wonder why we didn't pick some
|
|
// high table number like 5252, to further avoid the potential
|
|
// for collisions with other software. Unfortunately,
|
|
// Busybox's `ip` implementation believes that table numbers
|
|
// are 8-bit integers, so for maximum compatibility we had to
|
|
// stay in the 0-255 range even though linux itself supports
|
|
// larger numbers. (but nowadays we use netlink directly and
|
|
// aren't affected by the busybox binary's limitations)
|
|
tailscaleRouteTable = newRouteTable("tailscale", 52)
|
|
)
|
|
|
|
// ipRules are the policy routing rules that Tailscale uses.
|
|
//
|
|
// NOTE(apenwarr): We leave spaces between each pref number.
|
|
// This is so the sysadmin can override by inserting rules in
|
|
// between if they want.
|
|
//
|
|
// NOTE(apenwarr): This sequence seems complicated, right?
|
|
// If we could simply have a rule that said "match packets that
|
|
// *don't* have this fwmark", then we would only need to add one
|
|
// link to table 52 and we'd be done. Unfortunately, older kernels
|
|
// and 'ip rule' implementations (including busybox), don't support
|
|
// checking for the lack of a fwmark, only the presence. The technique
|
|
// below works even on very old kernels.
|
|
var ipRules = []netlink.Rule{
|
|
// Packets from us, tagged with our fwmark, first try the kernel's
|
|
// main routing table.
|
|
{
|
|
Priority: 5210,
|
|
Mark: tailscaleBypassMarkNum,
|
|
Table: mainRouteTable.num,
|
|
},
|
|
// ...and then we try the 'default' table, for correctness,
|
|
// even though it's been empty on every Linux system I've ever seen.
|
|
{
|
|
Priority: 5230,
|
|
Mark: tailscaleBypassMarkNum,
|
|
Table: defaultRouteTable.num,
|
|
},
|
|
// If neither of those matched (no default route on this system?)
|
|
// then packets from us should be aborted rather than falling through
|
|
// to the tailscale routes, because that would create routing loops.
|
|
{
|
|
Priority: 5250,
|
|
Mark: tailscaleBypassMarkNum,
|
|
Type: unix.RTN_UNREACHABLE,
|
|
},
|
|
// If we get to this point, capture all packets and send them
|
|
// through to the tailscale route table. For apps other than us
|
|
// (ie. with no fwmark set), this is the first routing table, so
|
|
// it takes precedence over all the others, ie. VPN routes always
|
|
// beat non-VPN routes.
|
|
{
|
|
Priority: 5270,
|
|
Table: tailscaleRouteTable.num,
|
|
},
|
|
// If that didn't match, then non-fwmark packets fall through to the
|
|
// usual rules (pref 32766 and 32767, ie. main and default).
|
|
}
|
|
|
|
// justAddIPRules adds policy routing rule without deleting any first.
|
|
func (r *linuxRouter) justAddIPRules() error {
|
|
if !r.ipRuleAvailable {
|
|
return nil
|
|
}
|
|
if r.useIPCommand() {
|
|
return r.addIPRulesWithIPCommand()
|
|
}
|
|
var errAcc error
|
|
for _, family := range r.addrFamilies() {
|
|
for _, ru := range ipRules {
|
|
// Note: r is a value type here; safe to mutate it.
|
|
ru.Family = family.netlinkInt()
|
|
ru.Mask = -1
|
|
ru.Goto = -1
|
|
ru.SuppressIfgroup = -1
|
|
ru.SuppressPrefixlen = -1
|
|
ru.Flow = -1
|
|
|
|
err := netlink.RuleAdd(&ru)
|
|
if errors.Is(err, errEEXIST) {
|
|
// Ignore dups.
|
|
continue
|
|
}
|
|
if err != nil && errAcc == nil {
|
|
errAcc = err
|
|
}
|
|
}
|
|
}
|
|
return errAcc
|
|
}
|
|
|
|
func (r *linuxRouter) addIPRulesWithIPCommand() error {
|
|
rg := newRunGroup(nil, r.cmd)
|
|
|
|
for _, family := range r.addrFamilies() {
|
|
for _, r := range ipRules {
|
|
args := []string{
|
|
"ip", family.dashArg(),
|
|
"rule", "add",
|
|
"pref", strconv.Itoa(r.Priority),
|
|
}
|
|
if r.Mark != 0 {
|
|
args = append(args, "fwmark", fmt.Sprintf("0x%x", r.Mark))
|
|
}
|
|
if r.Table != 0 {
|
|
args = append(args, "table", mustRouteTable(r.Table).ipCmdArg())
|
|
}
|
|
if r.Type == unix.RTN_UNREACHABLE {
|
|
args = append(args, "type", "unreachable")
|
|
}
|
|
rg.Run(args...)
|
|
}
|
|
}
|
|
|
|
return rg.ErrAcc
|
|
}
|
|
|
|
// delRoutes removes any local routes that we added that would not be
|
|
// cleaned up on interface down.
|
|
func (r *linuxRouter) delRoutes() error {
|
|
for rt := range r.localRoutes {
|
|
if err := r.delThrowRoute(rt); err != nil {
|
|
r.logf("failed to delete throw route(%q): %v", rt, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// delIPRules removes the policy routing rules that avoid
|
|
// tailscaled routing loops, if it exists.
|
|
func (r *linuxRouter) delIPRules() error {
|
|
if !r.ipRuleAvailable {
|
|
return nil
|
|
}
|
|
if r.useIPCommand() {
|
|
return r.delIPRulesWithIPCommand()
|
|
}
|
|
var errAcc error
|
|
for _, family := range r.addrFamilies() {
|
|
for _, ru := range ipRules {
|
|
// Note: r is a value type here; safe to mutate it.
|
|
// When deleting rules, we want to be a bit specific (mention which
|
|
// table we were routing to) but not *too* specific (fwmarks, etc).
|
|
// That leaves us some flexibility to change these values in later
|
|
// versions without having ongoing hacks for every possible
|
|
// combination.
|
|
ru.Family = family.netlinkInt()
|
|
ru.Mark = -1
|
|
ru.Mask = -1
|
|
ru.Goto = -1
|
|
ru.SuppressIfgroup = -1
|
|
ru.SuppressPrefixlen = -1
|
|
|
|
err := netlink.RuleDel(&ru)
|
|
if errors.Is(err, errENOENT) {
|
|
// Didn't exist to begin with.
|
|
continue
|
|
}
|
|
if err != nil && errAcc == nil {
|
|
errAcc = err
|
|
}
|
|
}
|
|
}
|
|
return errAcc
|
|
}
|
|
|
|
func (r *linuxRouter) delIPRulesWithIPCommand() error {
|
|
// Error codes: 'ip rule' returns error code 2 if the rule is a
|
|
// duplicate (add) or not found (del). It returns a different code
|
|
// for syntax errors. This is also true of busybox.
|
|
//
|
|
// Some older versions of iproute2 also return error code 254 for
|
|
// unknown rules during deletion.
|
|
rg := newRunGroup([]int{2, 254}, r.cmd)
|
|
|
|
for _, family := range r.addrFamilies() {
|
|
// When deleting rules, we want to be a bit specific (mention which
|
|
// table we were routing to) but not *too* specific (fwmarks, etc).
|
|
// That leaves us some flexibility to change these values in later
|
|
// versions without having ongoing hacks for every possible
|
|
// combination.
|
|
for _, r := range ipRules {
|
|
args := []string{
|
|
"ip", family.dashArg(),
|
|
"rule", "del",
|
|
"pref", strconv.Itoa(r.Priority),
|
|
}
|
|
if r.Table != 0 {
|
|
args = append(args, "table", mustRouteTable(r.Table).ipCmdArg())
|
|
} else {
|
|
args = append(args, "type", "unreachable")
|
|
}
|
|
rg.Run(args...)
|
|
}
|
|
}
|
|
|
|
return rg.ErrAcc
|
|
}
|
|
|
|
func (r *linuxRouter) netfilterFamilies() []netfilterRunner {
|
|
if r.v6Available {
|
|
return []netfilterRunner{r.ipt4, r.ipt6}
|
|
}
|
|
return []netfilterRunner{r.ipt4}
|
|
}
|
|
|
|
// addNetfilterChains creates custom Tailscale chains in netfilter.
|
|
func (r *linuxRouter) addNetfilterChains() error {
|
|
create := func(ipt netfilterRunner, table, chain string) error {
|
|
err := ipt.ClearChain(table, chain)
|
|
if errCode(err) == 1 {
|
|
// nonexistent chain. let's create it!
|
|
return ipt.NewChain(table, chain)
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("setting up %s/%s: %w", table, chain, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
for _, ipt := range r.netfilterFamilies() {
|
|
if err := create(ipt, "filter", "ts-input"); err != nil {
|
|
return err
|
|
}
|
|
if err := create(ipt, "filter", "ts-forward"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := create(r.ipt4, "nat", "ts-postrouting"); err != nil {
|
|
return err
|
|
}
|
|
if r.v6NATAvailable {
|
|
if err := create(r.ipt6, "nat", "ts-postrouting"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addNetfilterBase adds some basic processing rules to be
|
|
// supplemented by later calls to other helpers.
|
|
func (r *linuxRouter) addNetfilterBase() error {
|
|
if err := r.addNetfilterBase4(); err != nil {
|
|
return err
|
|
}
|
|
if r.v6Available {
|
|
if err := r.addNetfilterBase6(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addNetfilterBase4 adds some basic IPv4 processing rules to be
|
|
// supplemented by later calls to other helpers.
|
|
func (r *linuxRouter) addNetfilterBase4() error {
|
|
// Only allow CGNAT range traffic to come from tailscale0. There
|
|
// is an exception carved out for ranges used by ChromeOS, for
|
|
// which we fall out of the Tailscale chain.
|
|
//
|
|
// Note, this will definitely break nodes that end up using the
|
|
// CGNAT range for other purposes :(.
|
|
args := []string{"!", "-i", r.tunname, "-s", tsaddr.ChromeOSVMRange().String(), "-j", "RETURN"}
|
|
if err := r.ipt4.Append("filter", "ts-input", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v4/filter/ts-input: %w", args, err)
|
|
}
|
|
args = []string{"!", "-i", r.tunname, "-s", tsaddr.CGNATRange().String(), "-j", "DROP"}
|
|
if err := r.ipt4.Append("filter", "ts-input", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v4/filter/ts-input: %w", args, err)
|
|
}
|
|
|
|
// Forward all traffic from the Tailscale interface, and drop
|
|
// traffic to the tailscale interface by default. We use packet
|
|
// marks here so both filter/FORWARD and nat/POSTROUTING can match
|
|
// on these packets of interest.
|
|
//
|
|
// In particular, we only want to apply SNAT rules in
|
|
// nat/POSTROUTING to packets that originated from the Tailscale
|
|
// interface, but we can't match on the inbound interface in
|
|
// POSTROUTING. So instead, we match on the inbound interface in
|
|
// filter/FORWARD, and set a packet mark that nat/POSTROUTING can
|
|
// use to effectively run that same test again.
|
|
args = []string{"-i", r.tunname, "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark}
|
|
if err := r.ipt4.Append("filter", "ts-forward", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v4/filter/ts-forward: %w", args, err)
|
|
}
|
|
args = []string{"-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "ACCEPT"}
|
|
if err := r.ipt4.Append("filter", "ts-forward", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v4/filter/ts-forward: %w", args, err)
|
|
}
|
|
args = []string{"-o", r.tunname, "-s", tsaddr.CGNATRange().String(), "-j", "DROP"}
|
|
if err := r.ipt4.Append("filter", "ts-forward", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v4/filter/ts-forward: %w", args, err)
|
|
}
|
|
args = []string{"-o", r.tunname, "-j", "ACCEPT"}
|
|
if err := r.ipt4.Append("filter", "ts-forward", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v4/filter/ts-forward: %w", args, err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// addNetfilterBase4 adds some basic IPv6 processing rules to be
|
|
// supplemented by later calls to other helpers.
|
|
func (r *linuxRouter) addNetfilterBase6() error {
|
|
// TODO: only allow traffic from Tailscale's ULA range to come
|
|
// from tailscale0.
|
|
|
|
args := []string{"-i", r.tunname, "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark}
|
|
if err := r.ipt6.Append("filter", "ts-forward", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v6/filter/ts-forward: %w", args, err)
|
|
}
|
|
args = []string{"-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "ACCEPT"}
|
|
if err := r.ipt6.Append("filter", "ts-forward", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v6/filter/ts-forward: %w", args, err)
|
|
}
|
|
// TODO: drop forwarded traffic to tailscale0 from tailscale's ULA
|
|
// (see corresponding IPv4 CGNAT rule).
|
|
args = []string{"-o", r.tunname, "-j", "ACCEPT"}
|
|
if err := r.ipt6.Append("filter", "ts-forward", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v6/filter/ts-forward: %w", args, err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// delNetfilterChains removes the custom Tailscale chains from netfilter.
|
|
func (r *linuxRouter) delNetfilterChains() error {
|
|
del := func(ipt netfilterRunner, table, chain string) error {
|
|
if err := ipt.ClearChain(table, chain); err != nil {
|
|
if errCode(err) == 1 {
|
|
// nonexistent chain. That's fine, since it's
|
|
// the desired state anyway.
|
|
return nil
|
|
}
|
|
return fmt.Errorf("flushing %s/%s: %w", table, chain, err)
|
|
}
|
|
if err := ipt.DeleteChain(table, chain); err != nil {
|
|
// this shouldn't fail, because if the chain didn't
|
|
// exist, we would have returned after ClearChain.
|
|
return fmt.Errorf("deleting %s/%s: %v", table, chain, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
for _, ipt := range r.netfilterFamilies() {
|
|
if err := del(ipt, "filter", "ts-input"); err != nil {
|
|
return err
|
|
}
|
|
if err := del(ipt, "filter", "ts-forward"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := del(r.ipt4, "nat", "ts-postrouting"); err != nil {
|
|
return err
|
|
}
|
|
if r.v6NATAvailable {
|
|
if err := del(r.ipt6, "nat", "ts-postrouting"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// delNetfilterBase empties but does not remove custom Tailscale chains from
|
|
// netfilter.
|
|
func (r *linuxRouter) delNetfilterBase() error {
|
|
del := func(ipt netfilterRunner, table, chain string) error {
|
|
if err := ipt.ClearChain(table, chain); err != nil {
|
|
if errCode(err) == 1 {
|
|
// nonexistent chain. That's fine, since it's
|
|
// the desired state anyway.
|
|
return nil
|
|
}
|
|
return fmt.Errorf("flushing %s/%s: %w", table, chain, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
for _, ipt := range r.netfilterFamilies() {
|
|
if err := del(ipt, "filter", "ts-input"); err != nil {
|
|
return err
|
|
}
|
|
if err := del(ipt, "filter", "ts-forward"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := del(r.ipt4, "nat", "ts-postrouting"); err != nil {
|
|
return err
|
|
}
|
|
if r.v6NATAvailable {
|
|
if err := del(r.ipt6, "nat", "ts-postrouting"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// addNetfilterHooks inserts calls to tailscale's netfilter chains in
|
|
// the relevant main netfilter chains. The tailscale chains must
|
|
// already exist.
|
|
func (r *linuxRouter) addNetfilterHooks() error {
|
|
divert := func(ipt netfilterRunner, table, chain string) error {
|
|
tsChain := tsChain(chain)
|
|
|
|
args := []string{"-j", tsChain}
|
|
exists, err := ipt.Exists(table, chain, args...)
|
|
if err != nil {
|
|
return fmt.Errorf("checking for %v in %s/%s: %w", args, table, chain, err)
|
|
}
|
|
if exists {
|
|
return nil
|
|
}
|
|
if err := ipt.Insert(table, chain, 1, args...); err != nil {
|
|
return fmt.Errorf("adding %v in %s/%s: %w", args, table, chain, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
for _, ipt := range r.netfilterFamilies() {
|
|
if err := divert(ipt, "filter", "INPUT"); err != nil {
|
|
return err
|
|
}
|
|
if err := divert(ipt, "filter", "FORWARD"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := divert(r.ipt4, "nat", "POSTROUTING"); err != nil {
|
|
return err
|
|
}
|
|
if r.v6NATAvailable {
|
|
if err := divert(r.ipt6, "nat", "POSTROUTING"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// delNetfilterHooks deletes the calls to tailscale's netfilter chains
|
|
// in the relevant main netfilter chains.
|
|
func (r *linuxRouter) delNetfilterHooks() error {
|
|
del := func(ipt netfilterRunner, table, chain string) error {
|
|
tsChain := tsChain(chain)
|
|
args := []string{"-j", tsChain}
|
|
if err := ipt.Delete(table, chain, args...); err != nil {
|
|
// TODO(apenwarr): check for errCode(1) here.
|
|
// Unfortunately the error code from the iptables
|
|
// module resists unwrapping, unlike with other
|
|
// calls. So we have to assume if Delete fails,
|
|
// it's because there is no such rule.
|
|
r.logf("note: deleting %v in %s/%s: %w", args, table, chain, err)
|
|
return nil
|
|
}
|
|
return nil
|
|
}
|
|
|
|
for _, ipt := range r.netfilterFamilies() {
|
|
if err := del(ipt, "filter", "INPUT"); err != nil {
|
|
return err
|
|
}
|
|
if err := del(ipt, "filter", "FORWARD"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := del(r.ipt4, "nat", "POSTROUTING"); err != nil {
|
|
return err
|
|
}
|
|
if r.v6NATAvailable {
|
|
if err := del(r.ipt6, "nat", "POSTROUTING"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addSNATRule adds a netfilter rule to SNAT traffic destined for
|
|
// local subnets.
|
|
func (r *linuxRouter) addSNATRule() error {
|
|
if r.netfilterMode == netfilterOff {
|
|
return nil
|
|
}
|
|
|
|
args := []string{"-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "MASQUERADE"}
|
|
if err := r.ipt4.Append("nat", "ts-postrouting", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v4/nat/ts-postrouting: %w", args, err)
|
|
}
|
|
if r.v6NATAvailable {
|
|
if err := r.ipt6.Append("nat", "ts-postrouting", args...); err != nil {
|
|
return fmt.Errorf("adding %v in v6/nat/ts-postrouting: %w", args, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// delSNATRule removes the netfilter rule to SNAT traffic destined for
|
|
// local subnets. Fails if the rule does not exist.
|
|
func (r *linuxRouter) delSNATRule() error {
|
|
if r.netfilterMode == netfilterOff {
|
|
return nil
|
|
}
|
|
|
|
args := []string{"-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "MASQUERADE"}
|
|
if err := r.ipt4.Delete("nat", "ts-postrouting", args...); err != nil {
|
|
return fmt.Errorf("deleting %v in v4/nat/ts-postrouting: %w", args, err)
|
|
}
|
|
if r.v6NATAvailable {
|
|
if err := r.ipt6.Delete("nat", "ts-postrouting", args...); err != nil {
|
|
return fmt.Errorf("deleting %v in v6/nat/ts-postrouting: %w", args, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// cidrDiff calls add and del as needed to make the set of prefixes in
|
|
// old and new match. Returns a map reflecting the actual new state
|
|
// (which may be somewhere in between old and new if some commands
|
|
// failed), and any error encountered while reconfiguring.
|
|
func cidrDiff(kind string, old map[netip.Prefix]bool, new []netip.Prefix, add, del func(netip.Prefix) error, logf logger.Logf) (map[netip.Prefix]bool, error) {
|
|
newMap := make(map[netip.Prefix]bool, len(new))
|
|
for _, cidr := range new {
|
|
newMap[cidr] = true
|
|
}
|
|
|
|
// ret starts out as a copy of old, and updates as we
|
|
// add/delete. That way we can always return it and have it be the
|
|
// true state of what we've done so far.
|
|
ret := make(map[netip.Prefix]bool, len(old))
|
|
for cidr := range old {
|
|
ret[cidr] = true
|
|
}
|
|
|
|
var delFail []error
|
|
for cidr := range old {
|
|
if newMap[cidr] {
|
|
continue
|
|
}
|
|
if err := del(cidr); err != nil {
|
|
logf("%s del failed: %v", kind, err)
|
|
delFail = append(delFail, err)
|
|
} else {
|
|
delete(ret, cidr)
|
|
}
|
|
}
|
|
if len(delFail) == 1 {
|
|
return ret, delFail[0]
|
|
}
|
|
if len(delFail) > 0 {
|
|
return ret, fmt.Errorf("%d delete %s failures; first was: %w", len(delFail), kind, delFail[0])
|
|
}
|
|
|
|
var addFail []error
|
|
for cidr := range newMap {
|
|
if old[cidr] {
|
|
continue
|
|
}
|
|
if err := add(cidr); err != nil {
|
|
logf("%s add failed: %v", kind, err)
|
|
addFail = append(addFail, err)
|
|
} else {
|
|
ret[cidr] = true
|
|
}
|
|
}
|
|
|
|
if len(addFail) == 1 {
|
|
return ret, addFail[0]
|
|
}
|
|
if len(addFail) > 0 {
|
|
return ret, fmt.Errorf("%d add %s failures; first was: %w", len(addFail), kind, addFail[0])
|
|
}
|
|
|
|
return ret, nil
|
|
}
|
|
|
|
// tsChain returns the name of the tailscale sub-chain corresponding
|
|
// to the given "parent" chain (e.g. INPUT, FORWARD, ...).
|
|
func tsChain(chain string) string {
|
|
return "ts-" + strings.ToLower(chain)
|
|
}
|
|
|
|
// normalizeCIDR returns cidr as an ip/mask string, with the host bits
|
|
// of the IP address zeroed out.
|
|
func normalizeCIDR(cidr netip.Prefix) string {
|
|
return cidr.Masked().String()
|
|
}
|
|
|
|
func cleanup(logf logger.Logf, interfaceName string) {
|
|
// TODO(dmytro): clean up iptables.
|
|
}
|
|
|
|
// checkIPv6 checks whether the system appears to have a working IPv6
|
|
// network stack. It returns an error explaining what looks wrong or
|
|
// missing. It does not check that IPv6 is currently functional or
|
|
// that there's a global address, just that the system would support
|
|
// IPv6 if it were on an IPv6 network.
|
|
func checkIPv6(logf logger.Logf) error {
|
|
_, err := os.Stat("/proc/sys/net/ipv6")
|
|
if os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
bs, err := ioutil.ReadFile("/proc/sys/net/ipv6/conf/all/disable_ipv6")
|
|
if err != nil {
|
|
// Be conservative if we can't find the ipv6 configuration knob.
|
|
return err
|
|
}
|
|
disabled, err := strconv.ParseBool(strings.TrimSpace(string(bs)))
|
|
if err != nil {
|
|
return errors.New("disable_ipv6 has invalid bool")
|
|
}
|
|
if disabled {
|
|
return errors.New("disable_ipv6 is set")
|
|
}
|
|
|
|
// Older kernels don't support IPv6 policy routing. Some kernels
|
|
// support policy routing but don't have this knob, so absence of
|
|
// the knob is not fatal.
|
|
bs, err = ioutil.ReadFile("/proc/sys/net/ipv6/conf/all/disable_policy")
|
|
if err == nil {
|
|
disabled, err = strconv.ParseBool(strings.TrimSpace(string(bs)))
|
|
if err != nil {
|
|
return errors.New("disable_policy has invalid bool")
|
|
}
|
|
if disabled {
|
|
return errors.New("disable_policy is set")
|
|
}
|
|
}
|
|
|
|
if err := checkIPRuleSupportsV6(logf); err != nil {
|
|
return fmt.Errorf("kernel doesn't support IPv6 policy routing: %w", err)
|
|
}
|
|
|
|
// Some distros ship ip6tables separately from iptables.
|
|
if _, err := exec.LookPath("ip6tables"); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// supportsV6NAT returns whether the system has a "nat" table in the
|
|
// IPv6 netfilter stack.
|
|
//
|
|
// The nat table was added after the initial release of ipv6
|
|
// netfilter, so some older distros ship a kernel that can't NAT IPv6
|
|
// traffic.
|
|
func supportsV6NAT() bool {
|
|
bs, err := ioutil.ReadFile("/proc/net/ip6_tables_names")
|
|
if err != nil {
|
|
// Can't read the file. Assume SNAT works.
|
|
return true
|
|
}
|
|
if bytes.Contains(bs, []byte("nat\n")) {
|
|
return true
|
|
}
|
|
// In nftables mode, that proc file will be empty. Try another thing:
|
|
if exec.Command("modprobe", "ip6table_nat").Run() == nil {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func checkIPRuleSupportsV6(logf logger.Logf) error {
|
|
// First try just a read-only operation to ideally avoid
|
|
// having to modify any state.
|
|
if rules, err := netlink.RuleList(netlink.FAMILY_V6); err != nil {
|
|
return fmt.Errorf("querying IPv6 policy routing rules: %w", err)
|
|
} else {
|
|
if len(rules) > 0 {
|
|
logf("[v1] kernel supports IPv6 policy routing (found %d rules)", len(rules))
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Try to actually create & delete one as a test.
|
|
rule := netlink.NewRule()
|
|
rule.Priority = 1234
|
|
rule.Mark = tailscaleBypassMarkNum
|
|
rule.Table = tailscaleRouteTable.num
|
|
rule.Family = netlink.FAMILY_V6
|
|
// First delete the rule unconditionally, and don't check for
|
|
// errors. This is just cleaning up anything that might be already
|
|
// there.
|
|
netlink.RuleDel(rule)
|
|
// And clean up on exit.
|
|
defer netlink.RuleDel(rule)
|
|
return netlink.RuleAdd(rule)
|
|
}
|
|
|
|
func nlAddrOfPrefix(p netip.Prefix) *netlink.Addr {
|
|
return &netlink.Addr{
|
|
IPNet: netipx.PrefixIPNet(p),
|
|
}
|
|
}
|