tailscale/net/dns/manager.go

558 lines
16 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package dns
import (
"bufio"
"context"
"encoding/binary"
"errors"
"io"
"net"
"net/netip"
"runtime"
"slices"
"strings"
"sync"
"sync/atomic"
"time"
xmaps "golang.org/x/exp/maps"
"tailscale.com/control/controlknobs"
"tailscale.com/health"
"tailscale.com/net/dns/resolver"
"tailscale.com/net/netmon"
"tailscale.com/net/tsdial"
"tailscale.com/syncs"
"tailscale.com/tstime/rate"
"tailscale.com/types/dnstype"
"tailscale.com/types/logger"
"tailscale.com/util/clientmetric"
"tailscale.com/util/dnsname"
)
var (
errFullQueue = errors.New("request queue full")
)
// maxActiveQueries returns the maximal number of DNS requests that can
// be running.
const maxActiveQueries = 256
// We use file-ignore below instead of ignore because on some platforms,
// the lint exception is necessary and on others it is not,
// and plain ignore complains if the exception is unnecessary.
// Manager manages system DNS settings.
type Manager struct {
logf logger.Logf
health *health.Tracker
activeQueriesAtomic int32
ctx context.Context // good until Down
ctxCancel context.CancelFunc // closes ctx
resolver *resolver.Resolver
os OSConfigurator
knobs *controlknobs.Knobs // or nil
goos string // if empty, gets set to runtime.GOOS
// The last configuration we successfully compiled. Set to nil if
// there was any failure applying the last configuration
config *Config
// Must be held when accessing/setting config.
mu sync.Mutex
}
// NewManagers created a new manager from the given config.
//
// knobs may be nil.
func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker, dialer *tsdial.Dialer, linkSel resolver.ForwardLinkSelector, knobs *controlknobs.Knobs, goos string) *Manager {
if dialer == nil {
panic("nil Dialer")
}
if dialer.NetMon() == nil {
panic("Dialer has nil NetMon")
}
logf = logger.WithPrefix(logf, "dns: ")
if goos == "" {
goos = runtime.GOOS
}
m := &Manager{
logf: logf,
resolver: resolver.New(logf, linkSel, dialer, knobs),
os: oscfg,
health: health,
knobs: knobs,
goos: goos,
}
// Rate limit our attempts to correct our DNS configuration.
limiter := rate.NewLimiter(1.0/5.0, 1)
// This will recompile the DNS config, which in turn will requery the system
// DNS settings. The recovery func should triggered only when we are missing
// upstream nameservers and require them to forward a query.
m.resolver.SetMissingUpstreamRecovery(func() {
m.mu.Lock()
defer m.mu.Unlock()
if m.config == nil {
return
}
if limiter.Allow() {
m.logf("DNS resolution failed due to missing upstream nameservers. Recompiling DNS configuration.")
m.setLocked(*m.config)
}
})
m.ctx, m.ctxCancel = context.WithCancel(context.Background())
m.logf("using %T", m.os)
return m
}
// Resolver returns the Manager's DNS Resolver.
func (m *Manager) Resolver() *resolver.Resolver { return m.resolver }
func (m *Manager) Set(cfg Config) error {
m.mu.Lock()
defer m.mu.Unlock()
return m.setLocked(cfg)
}
// Sets the DNS configuration.
// m.mu must be held
func (m *Manager) setLocked(cfg Config) error {
syncs.AssertLocked(&m.mu)
// On errors, the 'set' config is cleared.
m.config = nil
m.logf("Set: %v", logger.ArgWriter(func(w *bufio.Writer) {
cfg.WriteToBufioWriter(w)
}))
rcfg, ocfg, err := m.compileConfig(cfg)
if err != nil {
return err
}
m.logf("Resolvercfg: %v", logger.ArgWriter(func(w *bufio.Writer) {
rcfg.WriteToBufioWriter(w)
}))
m.logf("OScfg: %v", logger.ArgWriter(func(w *bufio.Writer) {
ocfg.WriteToBufioWriter(w)
}))
if err := m.resolver.SetConfig(rcfg); err != nil {
return err
}
if err := m.os.SetDNS(ocfg); err != nil {
m.health.SetDNSOSHealth(err)
return err
}
m.health.SetDNSOSHealth(nil)
m.config = &cfg
return nil
}
// compileHostEntries creates a list of single-label resolutions possible
// from the configured hosts and search domains.
// The entries are compiled in the order of the search domains, then the hosts.
// The returned list is sorted by the first hostname in each entry.
func compileHostEntries(cfg Config) (hosts []*HostEntry) {
didLabel := make(map[string]bool, len(cfg.Hosts))
hostsMap := make(map[netip.Addr]*HostEntry, len(cfg.Hosts))
for _, sd := range cfg.SearchDomains {
for h, ips := range cfg.Hosts {
if !sd.Contains(h) || h.NumLabels() != (sd.NumLabels()+1) {
continue
}
ipHosts := []string{string(h.WithTrailingDot())}
if label := dnsname.FirstLabel(string(h)); !didLabel[label] {
didLabel[label] = true
ipHosts = append(ipHosts, label)
}
for _, ip := range ips {
if cfg.OnlyIPv6 && ip.Is4() {
continue
}
if e := hostsMap[ip]; e != nil {
e.Hosts = append(e.Hosts, ipHosts...)
} else {
hostsMap[ip] = &HostEntry{
Addr: ip,
Hosts: ipHosts,
}
}
// Only add IPv4 or IPv6 per host, like we do in the resolver.
break
}
}
}
if len(hostsMap) == 0 {
return nil
}
hosts = xmaps.Values(hostsMap)
slices.SortFunc(hosts, func(a, b *HostEntry) int {
if len(a.Hosts) == 0 && len(b.Hosts) == 0 {
return 0
} else if len(a.Hosts) == 0 {
return -1
} else if len(b.Hosts) == 0 {
return 1
}
return strings.Compare(a.Hosts[0], b.Hosts[0])
})
return hosts
}
// compileConfig converts cfg into a quad-100 resolver configuration
// and an OS-level configuration.
func (m *Manager) compileConfig(cfg Config) (rcfg resolver.Config, ocfg OSConfig, err error) {
// The internal resolver always gets MagicDNS hosts and
// authoritative suffixes, even if we don't propagate MagicDNS to
// the OS.
rcfg.Hosts = cfg.Hosts
routes := map[dnsname.FQDN][]*dnstype.Resolver{} // assigned conditionally to rcfg.Routes below.
for suffix, resolvers := range cfg.Routes {
if len(resolvers) == 0 {
rcfg.LocalDomains = append(rcfg.LocalDomains, suffix)
} else {
routes[suffix] = resolvers
}
}
// Similarly, the OS always gets search paths.
ocfg.SearchDomains = cfg.SearchDomains
if m.goos == "windows" {
ocfg.Hosts = compileHostEntries(cfg)
}
// Deal with trivial configs first.
switch {
case !cfg.needsOSResolver():
// Set search domains, but nothing else. This also covers the
// case where cfg is entirely zero, in which case these
// configs clear all Tailscale DNS settings.
return rcfg, ocfg, nil
case cfg.hasDefaultIPResolversOnly() && !cfg.hasHostsWithoutSplitDNSRoutes():
// Trivial CorpDNS configuration, just override the OS resolver.
//
// If there are hosts (ExtraRecords) that are not covered by an existing
// SplitDNS route, then we don't go into this path so that we fall into
// the next case and send the extra record hosts queries through
// 100.100.100.100 instead where we can answer them.
//
// TODO: for OSes that support it, pass IP:port and DoH
// addresses directly to OS.
// https://github.com/tailscale/tailscale/issues/1666
ocfg.Nameservers = toIPsOnly(cfg.DefaultResolvers)
return rcfg, ocfg, nil
case cfg.hasDefaultResolvers():
// Default resolvers plus other stuff always ends up proxying
// through quad-100.
rcfg.Routes = routes
rcfg.Routes["."] = cfg.DefaultResolvers
ocfg.Nameservers = []netip.Addr{cfg.serviceIP()}
return rcfg, ocfg, nil
}
// From this point on, we're figuring out split DNS
// configurations. The possible cases don't return directly any
// more, because as a final step we have to handle the case where
// the OS can't do split DNS.
// Workaround for
// https://github.com/tailscale/corp/issues/1662. Even though
// Windows natively supports split DNS, it only configures linux
// containers using whatever the primary is, and doesn't apply
// NRPT rules to DNS traffic coming from WSL.
//
// In order to make WSL work okay when the host Windows is using
// Tailscale, we need to set up quad-100 as a "full proxy"
// resolver, regardless of whether Windows itself can do split
// DNS. We still make Windows do split DNS itself when it can, but
// quad-100 will still have the full split configuration as well,
// and so can service WSL requests correctly.
//
// This bool is used in a couple of places below to implement this
// workaround.
isWindows := m.goos == "windows"
isApple := (m.goos == "darwin" || m.goos == "ios")
if len(cfg.singleResolverSet()) > 0 && m.os.SupportsSplitDNS() && !isWindows && !isApple {
// Split DNS configuration requested, where all split domains
// go to the same resolvers. We can let the OS do it.
ocfg.Nameservers = toIPsOnly(cfg.singleResolverSet())
ocfg.MatchDomains = cfg.matchDomains()
return rcfg, ocfg, nil
}
// Split DNS configuration with either multiple upstream routes,
// or routes + MagicDNS, or just MagicDNS, or on an OS that cannot
// split-DNS. Install a split config pointing at quad-100.
rcfg.Routes = routes
ocfg.Nameservers = []netip.Addr{cfg.serviceIP()}
var baseCfg *OSConfig // base config; non-nil if/when known
// Even though Apple devices can do split DNS, they don't provide a way to
// selectively answer ExtraRecords, and ignore other DNS traffic. As a
// workaround, we read the existing default resolver configuration and use
// that as the forwarder for all DNS traffic that quad-100 doesn't handle.
if isApple || !m.os.SupportsSplitDNS() {
// If the OS can't do native split-dns, read out the underlying
// resolver config and blend it into our config.
cfg, err := m.os.GetBaseConfig()
if err == nil {
baseCfg = &cfg
} else if isApple && err == ErrGetBaseConfigNotSupported {
// This is currently (2022-10-13) expected on certain iOS and macOS
// builds.
} else {
m.health.SetDNSOSHealth(err)
return resolver.Config{}, OSConfig{}, err
}
}
if baseCfg == nil {
// If there was no base config, then we need to fallback to SplitDNS mode.
ocfg.MatchDomains = cfg.matchDomains()
} else {
// On iOS only (for now), check if all route names point to resources inside the tailnet.
// If so, we can set those names as MatchDomains to enable a split DNS configuration
// which will help preserve battery life.
// Because on iOS MatchDomains must equal SearchDomains, we cannot do this when
// we have any Routes outside the tailnet. Otherwise when app connectors are enabled,
// a query for 'work-laptop' might lead to search domain expansion, resolving
// as 'work-laptop.aws.com' for example.
if m.goos == "ios" && rcfg.RoutesRequireNoCustomResolvers() {
if !m.disableSplitDNSOptimization() {
for r := range rcfg.Routes {
ocfg.MatchDomains = append(ocfg.MatchDomains, r)
}
} else {
m.logf("iOS split DNS is disabled by nodeattr")
}
}
var defaultRoutes []*dnstype.Resolver
for _, ip := range baseCfg.Nameservers {
defaultRoutes = append(defaultRoutes, &dnstype.Resolver{Addr: ip.String()})
}
rcfg.Routes["."] = defaultRoutes
ocfg.SearchDomains = append(ocfg.SearchDomains, baseCfg.SearchDomains...)
}
return rcfg, ocfg, nil
}
func (m *Manager) disableSplitDNSOptimization() bool {
return m.knobs != nil && m.knobs.DisableSplitDNSWhenNoCustomResolvers.Load()
}
// toIPsOnly returns only the IP portion of dnstype.Resolver.
// Only safe to use if the resolvers slice has been cleared of
// DoH or custom-port entries with something like hasDefaultIPResolversOnly.
func toIPsOnly(resolvers []*dnstype.Resolver) (ret []netip.Addr) {
for _, r := range resolvers {
if ipp, ok := r.IPPort(); ok && ipp.Port() == 53 {
ret = append(ret, ipp.Addr())
}
}
return ret
}
// Query executes a DNS query received from the given address. The query is
// provided in bs as a wire-encoded DNS query without any transport header.
// This method is called for requests arriving over UDP and TCP.
//
// The "family" parameter should indicate what type of DNS query this is:
// either "tcp" or "udp".
func (m *Manager) Query(ctx context.Context, bs []byte, family string, from netip.AddrPort) ([]byte, error) {
select {
case <-m.ctx.Done():
return nil, net.ErrClosed
default:
// continue
}
if n := atomic.AddInt32(&m.activeQueriesAtomic, 1); n > maxActiveQueries {
atomic.AddInt32(&m.activeQueriesAtomic, -1)
metricDNSQueryErrorQueue.Add(1)
return nil, errFullQueue
}
defer atomic.AddInt32(&m.activeQueriesAtomic, -1)
return m.resolver.Query(ctx, bs, family, from)
}
const (
// RFC 7766 6.2 recommends connection reuse & request pipelining
// be undertaken, and the connection be closed by the server
// using an idle timeout on the order of seconds.
idleTimeoutTCP = 45 * time.Second
// The RFCs don't specify the max size of a TCP-based DNS query,
// but we want to keep this reasonable. Given payloads are typically
// much larger and all known client send a single query, I've arbitrarily
// chosen 4k.
maxReqSizeTCP = 4096
)
// dnsTCPSession services DNS requests sent over TCP.
type dnsTCPSession struct {
m *Manager
conn net.Conn
srcAddr netip.AddrPort
readClosing chan struct{}
responses chan []byte // DNS replies pending writing
ctx context.Context
closeCtx context.CancelFunc
}
func (s *dnsTCPSession) handleWrites() {
defer s.conn.Close()
defer s.closeCtx()
// NOTE(andrew): we explicitly do not close the 'responses' channel
// when this function exits. If we hit an error and return, we could
// still have outstanding 'handleQuery' goroutines running, and if we
// closed this channel they'd end up trying to send on a closed channel
// when they finish.
//
// Because we call closeCtx, those goroutines will not hang since they
// select on <-s.ctx.Done() as well as s.responses.
for {
select {
case <-s.readClosing:
return // connection closed or timeout, teardown time
case resp := <-s.responses:
s.conn.SetWriteDeadline(time.Now().Add(idleTimeoutTCP))
if err := binary.Write(s.conn, binary.BigEndian, uint16(len(resp))); err != nil {
s.m.logf("tcp write (len): %v", err)
return
}
if _, err := s.conn.Write(resp); err != nil {
s.m.logf("tcp write (response): %v", err)
return
}
}
}
}
func (s *dnsTCPSession) handleQuery(q []byte) {
resp, err := s.m.Query(s.ctx, q, "tcp", s.srcAddr)
if err != nil {
s.m.logf("tcp query: %v", err)
return
}
// See note in handleWrites (above) regarding this select{}
select {
case <-s.ctx.Done():
case s.responses <- resp:
}
}
func (s *dnsTCPSession) handleReads() {
defer s.conn.Close()
defer close(s.readClosing)
for {
select {
case <-s.ctx.Done():
return
default:
s.conn.SetReadDeadline(time.Now().Add(idleTimeoutTCP))
var reqLen uint16
if err := binary.Read(s.conn, binary.BigEndian, &reqLen); err != nil {
if err == io.EOF || err == io.ErrClosedPipe {
return // connection closed nominally, we gucci
}
s.m.logf("tcp read (len): %v", err)
return
}
if int(reqLen) > maxReqSizeTCP {
s.m.logf("tcp request too large (%d > %d)", reqLen, maxReqSizeTCP)
return
}
buf := make([]byte, int(reqLen))
if _, err := io.ReadFull(s.conn, buf); err != nil {
s.m.logf("tcp read (payload): %v", err)
return
}
select {
case <-s.ctx.Done():
return
default:
// NOTE: by kicking off the query handling in a
// new goroutine, it is possible that we'll
// deliver responses out-of-order. This is
// explicitly allowed by RFC7766, Section
// 6.2.1.1 ("Query Pipelining").
go s.handleQuery(buf)
}
}
}
}
// HandleTCPConn implements magicDNS over TCP, taking a connection and
// servicing DNS requests sent down it.
func (m *Manager) HandleTCPConn(conn net.Conn, srcAddr netip.AddrPort) {
s := dnsTCPSession{
m: m,
conn: conn,
srcAddr: srcAddr,
responses: make(chan []byte),
readClosing: make(chan struct{}),
}
s.ctx, s.closeCtx = context.WithCancel(m.ctx)
go s.handleReads()
s.handleWrites()
}
func (m *Manager) Down() error {
m.ctxCancel()
if err := m.os.Close(); err != nil {
return err
}
m.resolver.Close()
return nil
}
func (m *Manager) FlushCaches() error {
return flushCaches()
}
// CleanUp restores the system DNS configuration to its original state
// in case the Tailscale daemon terminated without closing the router.
// No other state needs to be instantiated before this runs.
func CleanUp(logf logger.Logf, netMon *netmon.Monitor, interfaceName string) {
oscfg, err := NewOSConfigurator(logf, nil, nil, interfaceName)
if err != nil {
logf("creating dns cleanup: %v", err)
return
}
d := &tsdial.Dialer{Logf: logf}
d.SetNetMon(netMon)
dns := NewManager(logf, oscfg, nil, d, nil, nil, runtime.GOOS)
if err := dns.Down(); err != nil {
logf("dns down: %v", err)
}
}
var (
metricDNSQueryErrorQueue = clientmetric.NewCounter("dns_query_local_error_queue")
)