570 lines
16 KiB
Go
570 lines
16 KiB
Go
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package dns
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"encoding/binary"
|
|
"errors"
|
|
"io"
|
|
"net"
|
|
"net/netip"
|
|
"runtime"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"golang.org/x/exp/slices"
|
|
"tailscale.com/health"
|
|
"tailscale.com/net/dns/resolver"
|
|
"tailscale.com/net/packet"
|
|
"tailscale.com/net/tsaddr"
|
|
"tailscale.com/net/tsdial"
|
|
"tailscale.com/net/tstun"
|
|
"tailscale.com/types/dnstype"
|
|
"tailscale.com/types/ipproto"
|
|
"tailscale.com/types/logger"
|
|
"tailscale.com/util/clientmetric"
|
|
"tailscale.com/util/dnsname"
|
|
"tailscale.com/wgengine/monitor"
|
|
)
|
|
|
|
var (
|
|
magicDNSIP = tsaddr.TailscaleServiceIP()
|
|
magicDNSIPv6 = tsaddr.TailscaleServiceIPv6()
|
|
)
|
|
|
|
var (
|
|
errFullQueue = errors.New("request queue full")
|
|
)
|
|
|
|
// maxActiveQueries returns the maximal number of DNS requests that be
|
|
// can running.
|
|
// If EnqueueRequest is called when this many requests are already pending,
|
|
// the request will be dropped to avoid blocking the caller.
|
|
func maxActiveQueries() int32 {
|
|
if runtime.GOOS == "ios" {
|
|
// For memory paranoia reasons on iOS, match the
|
|
// historical Tailscale 1.x..1.8 behavior for now
|
|
// (just before the 1.10 release).
|
|
return 64
|
|
}
|
|
// But for other platforms, allow more burstiness:
|
|
return 256
|
|
}
|
|
|
|
// We use file-ignore below instead of ignore because on some platforms,
|
|
// the lint exception is necessary and on others it is not,
|
|
// and plain ignore complains if the exception is unnecessary.
|
|
|
|
// reconfigTimeout is the time interval within which Manager.{Up,Down} should complete.
|
|
//
|
|
// This is particularly useful because certain conditions can cause indefinite hangs
|
|
// (such as improper dbus auth followed by contextless dbus.Object.Call).
|
|
// Such operations should be wrapped in a timeout context.
|
|
const reconfigTimeout = time.Second
|
|
|
|
type response struct {
|
|
pkt []byte
|
|
to netip.AddrPort // response destination (request source)
|
|
}
|
|
|
|
// Manager manages system DNS settings.
|
|
type Manager struct {
|
|
logf logger.Logf
|
|
|
|
// When netstack is not used, Manager implements magic DNS.
|
|
// In this case, responses tracks completed DNS requests
|
|
// which need a response, and NextPacket() synthesizes a
|
|
// fake IP+UDP header to finish assembling the response.
|
|
//
|
|
// TODO(tom): Rip out once all platforms use netstack.
|
|
responses chan response
|
|
activeQueriesAtomic int32
|
|
|
|
ctx context.Context // good until Down
|
|
ctxCancel context.CancelFunc // closes ctx
|
|
|
|
resolver *resolver.Resolver
|
|
os OSConfigurator
|
|
}
|
|
|
|
// NewManagers created a new manager from the given config.
|
|
func NewManager(logf logger.Logf, oscfg OSConfigurator, linkMon *monitor.Mon, dialer *tsdial.Dialer, linkSel resolver.ForwardLinkSelector) *Manager {
|
|
if dialer == nil {
|
|
panic("nil Dialer")
|
|
}
|
|
logf = logger.WithPrefix(logf, "dns: ")
|
|
m := &Manager{
|
|
logf: logf,
|
|
resolver: resolver.New(logf, linkMon, linkSel, dialer),
|
|
os: oscfg,
|
|
responses: make(chan response),
|
|
}
|
|
m.ctx, m.ctxCancel = context.WithCancel(context.Background())
|
|
m.logf("using %T", m.os)
|
|
return m
|
|
}
|
|
|
|
// Resolver returns the Manager's DNS Resolver.
|
|
func (m *Manager) Resolver() *resolver.Resolver { return m.resolver }
|
|
|
|
func (m *Manager) Set(cfg Config) error {
|
|
m.logf("Set: %v", logger.ArgWriter(func(w *bufio.Writer) {
|
|
cfg.WriteToBufioWriter(w)
|
|
}))
|
|
|
|
rcfg, ocfg, err := m.compileConfig(cfg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m.logf("Resolvercfg: %v", logger.ArgWriter(func(w *bufio.Writer) {
|
|
rcfg.WriteToBufioWriter(w)
|
|
}))
|
|
m.logf("OScfg: %+v", ocfg)
|
|
|
|
if err := m.resolver.SetConfig(rcfg); err != nil {
|
|
return err
|
|
}
|
|
if err := m.os.SetDNS(ocfg); err != nil {
|
|
health.SetDNSOSHealth(err)
|
|
return err
|
|
}
|
|
health.SetDNSOSHealth(nil)
|
|
|
|
return nil
|
|
}
|
|
|
|
// compileHostEntries creates a list of single-label resolutions possible
|
|
// from the configured hosts and search domains.
|
|
// The entries are compiled in the order of the search domains, then the hosts.
|
|
// The returned list is sorted by the first hostname in each entry.
|
|
func compileHostEntries(cfg Config) (hosts []*HostEntry) {
|
|
didLabel := make(map[string]bool, len(cfg.Hosts))
|
|
for _, sd := range cfg.SearchDomains {
|
|
for h, ips := range cfg.Hosts {
|
|
if !sd.Contains(h) || h.NumLabels() != (sd.NumLabels()+1) {
|
|
continue
|
|
}
|
|
ipHosts := []string{string(h.WithTrailingDot())}
|
|
if label := dnsname.FirstLabel(string(h)); !didLabel[label] {
|
|
didLabel[label] = true
|
|
ipHosts = append(ipHosts, label)
|
|
}
|
|
for _, ip := range ips {
|
|
if cfg.OnlyIPv6 && ip.Is4() {
|
|
continue
|
|
}
|
|
hosts = append(hosts, &HostEntry{
|
|
Addr: ip,
|
|
Hosts: ipHosts,
|
|
})
|
|
// Only add IPv4 or IPv6 per host, like we do in the resolver.
|
|
break
|
|
}
|
|
}
|
|
}
|
|
slices.SortFunc(hosts, func(a, b *HostEntry) bool {
|
|
if len(a.Hosts) == 0 {
|
|
return false
|
|
}
|
|
if len(b.Hosts) == 0 {
|
|
return true
|
|
}
|
|
return a.Hosts[0] < b.Hosts[0]
|
|
})
|
|
return hosts
|
|
}
|
|
|
|
// compileConfig converts cfg into a quad-100 resolver configuration
|
|
// and an OS-level configuration.
|
|
func (m *Manager) compileConfig(cfg Config) (rcfg resolver.Config, ocfg OSConfig, err error) {
|
|
// The internal resolver always gets MagicDNS hosts and
|
|
// authoritative suffixes, even if we don't propagate MagicDNS to
|
|
// the OS.
|
|
rcfg.Hosts = cfg.Hosts
|
|
routes := map[dnsname.FQDN][]*dnstype.Resolver{} // assigned conditionally to rcfg.Routes below.
|
|
for suffix, resolvers := range cfg.Routes {
|
|
if len(resolvers) == 0 {
|
|
rcfg.LocalDomains = append(rcfg.LocalDomains, suffix)
|
|
} else {
|
|
routes[suffix] = resolvers
|
|
}
|
|
}
|
|
|
|
// Similarly, the OS always gets search paths.
|
|
ocfg.SearchDomains = cfg.SearchDomains
|
|
if runtime.GOOS == "windows" {
|
|
ocfg.Hosts = compileHostEntries(cfg)
|
|
}
|
|
|
|
// Deal with trivial configs first.
|
|
switch {
|
|
case !cfg.needsOSResolver():
|
|
// Set search domains, but nothing else. This also covers the
|
|
// case where cfg is entirely zero, in which case these
|
|
// configs clear all Tailscale DNS settings.
|
|
return rcfg, ocfg, nil
|
|
case cfg.hasDefaultIPResolversOnly() && !cfg.hasHostsWithoutSplitDNSRoutes():
|
|
// Trivial CorpDNS configuration, just override the OS resolver.
|
|
//
|
|
// If there are hosts (ExtraRecords) that are not covered by an existing
|
|
// SplitDNS route, then we don't go into this path so that we fall into
|
|
// the next case and send the extra record hosts queries through
|
|
// 100.100.100.100 instead where we can answer them.
|
|
//
|
|
// TODO: for OSes that support it, pass IP:port and DoH
|
|
// addresses directly to OS.
|
|
// https://github.com/tailscale/tailscale/issues/1666
|
|
ocfg.Nameservers = toIPsOnly(cfg.DefaultResolvers)
|
|
return rcfg, ocfg, nil
|
|
case cfg.hasDefaultResolvers():
|
|
// Default resolvers plus other stuff always ends up proxying
|
|
// through quad-100.
|
|
rcfg.Routes = routes
|
|
rcfg.Routes["."] = cfg.DefaultResolvers
|
|
ocfg.Nameservers = []netip.Addr{cfg.serviceIP()}
|
|
return rcfg, ocfg, nil
|
|
}
|
|
|
|
// From this point on, we're figuring out split DNS
|
|
// configurations. The possible cases don't return directly any
|
|
// more, because as a final step we have to handle the case where
|
|
// the OS can't do split DNS.
|
|
|
|
// Workaround for
|
|
// https://github.com/tailscale/corp/issues/1662. Even though
|
|
// Windows natively supports split DNS, it only configures linux
|
|
// containers using whatever the primary is, and doesn't apply
|
|
// NRPT rules to DNS traffic coming from WSL.
|
|
//
|
|
// In order to make WSL work okay when the host Windows is using
|
|
// Tailscale, we need to set up quad-100 as a "full proxy"
|
|
// resolver, regardless of whether Windows itself can do split
|
|
// DNS. We still make Windows do split DNS itself when it can, but
|
|
// quad-100 will still have the full split configuration as well,
|
|
// and so can service WSL requests correctly.
|
|
//
|
|
// This bool is used in a couple of places below to implement this
|
|
// workaround.
|
|
isWindows := runtime.GOOS == "windows"
|
|
if cfg.singleResolverSet() != nil && m.os.SupportsSplitDNS() && !isWindows {
|
|
// Split DNS configuration requested, where all split domains
|
|
// go to the same resolvers. We can let the OS do it.
|
|
ocfg.Nameservers = toIPsOnly(cfg.singleResolverSet())
|
|
ocfg.MatchDomains = cfg.matchDomains()
|
|
return rcfg, ocfg, nil
|
|
}
|
|
|
|
// Split DNS configuration with either multiple upstream routes,
|
|
// or routes + MagicDNS, or just MagicDNS, or on an OS that cannot
|
|
// split-DNS. Install a split config pointing at quad-100.
|
|
rcfg.Routes = routes
|
|
ocfg.Nameservers = []netip.Addr{cfg.serviceIP()}
|
|
|
|
var baseCfg *OSConfig // base config; non-nil if/when known
|
|
|
|
// Even though Apple devices can do split DNS, they don't provide a way to
|
|
// selectively answer ExtraRecords, and ignore other DNS traffic. As a
|
|
// workaround, we read the existing default resolver configuration and use
|
|
// that as the forwarder for all DNS traffic that quad-100 doesn't handle.
|
|
const isApple = runtime.GOOS == "darwin" || runtime.GOOS == "ios"
|
|
|
|
if isApple || !m.os.SupportsSplitDNS() {
|
|
// If the OS can't do native split-dns, read out the underlying
|
|
// resolver config and blend it into our config.
|
|
cfg, err := m.os.GetBaseConfig()
|
|
if err == nil {
|
|
baseCfg = &cfg
|
|
} else if isApple && err == ErrGetBaseConfigNotSupported {
|
|
// This is currently (2022-10-13) expected on certain iOS and macOS
|
|
// builds.
|
|
} else {
|
|
health.SetDNSOSHealth(err)
|
|
return resolver.Config{}, OSConfig{}, err
|
|
}
|
|
}
|
|
|
|
if baseCfg == nil || isApple && len(baseCfg.Nameservers) == 0 {
|
|
// If there was no base config, or if we're on Apple and the base
|
|
// config is empty, then we need to fallback to SplitDNS mode.
|
|
ocfg.MatchDomains = cfg.matchDomains()
|
|
} else {
|
|
var defaultRoutes []*dnstype.Resolver
|
|
for _, ip := range baseCfg.Nameservers {
|
|
defaultRoutes = append(defaultRoutes, &dnstype.Resolver{Addr: ip.String()})
|
|
}
|
|
rcfg.Routes["."] = defaultRoutes
|
|
ocfg.SearchDomains = append(ocfg.SearchDomains, baseCfg.SearchDomains...)
|
|
}
|
|
|
|
return rcfg, ocfg, nil
|
|
}
|
|
|
|
// toIPsOnly returns only the IP portion of dnstype.Resolver.
|
|
// Only safe to use if the resolvers slice has been cleared of
|
|
// DoH or custom-port entries with something like hasDefaultIPResolversOnly.
|
|
func toIPsOnly(resolvers []*dnstype.Resolver) (ret []netip.Addr) {
|
|
for _, r := range resolvers {
|
|
if ipp, ok := r.IPPort(); ok && ipp.Port() == 53 {
|
|
ret = append(ret, ipp.Addr())
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
// EnqueuePacket is the legacy path for handling magic DNS traffic, and is
|
|
// called with a DNS request payload.
|
|
//
|
|
// TODO(tom): Rip out once all platforms use netstack.
|
|
func (m *Manager) EnqueuePacket(bs []byte, proto ipproto.Proto, from, to netip.AddrPort) error {
|
|
if to.Port() != 53 || proto != ipproto.UDP {
|
|
return nil
|
|
}
|
|
|
|
if n := atomic.AddInt32(&m.activeQueriesAtomic, 1); n > maxActiveQueries() {
|
|
atomic.AddInt32(&m.activeQueriesAtomic, -1)
|
|
metricDNSQueryErrorQueue.Add(1)
|
|
return errFullQueue
|
|
}
|
|
|
|
go func() {
|
|
resp, err := m.resolver.Query(m.ctx, bs, from)
|
|
if err != nil {
|
|
atomic.AddInt32(&m.activeQueriesAtomic, -1)
|
|
m.logf("dns query: %v", err)
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-m.ctx.Done():
|
|
return
|
|
case m.responses <- response{resp, from}:
|
|
}
|
|
}()
|
|
return nil
|
|
}
|
|
|
|
// NextPacket is the legacy path for obtaining DNS results in response to
|
|
// magic DNS queries. It blocks until a response is available.
|
|
//
|
|
// TODO(tom): Rip out once all platforms use netstack.
|
|
func (m *Manager) NextPacket() ([]byte, error) {
|
|
var resp response
|
|
select {
|
|
case <-m.ctx.Done():
|
|
return nil, net.ErrClosed
|
|
case resp = <-m.responses:
|
|
// continue
|
|
}
|
|
|
|
// Unused space is needed further down the stack. To avoid extra
|
|
// allocations/copying later on, we allocate such space here.
|
|
const offset = tstun.PacketStartOffset
|
|
|
|
var buf []byte
|
|
switch {
|
|
case resp.to.Addr().Is4():
|
|
h := packet.UDP4Header{
|
|
IP4Header: packet.IP4Header{
|
|
Src: magicDNSIP,
|
|
Dst: resp.to.Addr(),
|
|
},
|
|
SrcPort: 53,
|
|
DstPort: resp.to.Port(),
|
|
}
|
|
hlen := h.Len()
|
|
buf = make([]byte, offset+hlen+len(resp.pkt))
|
|
copy(buf[offset+hlen:], resp.pkt)
|
|
h.Marshal(buf[offset:])
|
|
case resp.to.Addr().Is6():
|
|
h := packet.UDP6Header{
|
|
IP6Header: packet.IP6Header{
|
|
Src: magicDNSIPv6,
|
|
Dst: resp.to.Addr(),
|
|
},
|
|
SrcPort: 53,
|
|
DstPort: resp.to.Port(),
|
|
}
|
|
hlen := h.Len()
|
|
buf = make([]byte, offset+hlen+len(resp.pkt))
|
|
copy(buf[offset+hlen:], resp.pkt)
|
|
h.Marshal(buf[offset:])
|
|
}
|
|
|
|
atomic.AddInt32(&m.activeQueriesAtomic, -1)
|
|
return buf, nil
|
|
}
|
|
|
|
// Query executes a DNS query received from the given address. The query is
|
|
// provided in bs as a wire-encoded DNS query without any transport header.
|
|
// This method is called for requests arriving over UDP and TCP.
|
|
func (m *Manager) Query(ctx context.Context, bs []byte, from netip.AddrPort) ([]byte, error) {
|
|
select {
|
|
case <-m.ctx.Done():
|
|
return nil, net.ErrClosed
|
|
default:
|
|
// continue
|
|
}
|
|
|
|
if n := atomic.AddInt32(&m.activeQueriesAtomic, 1); n > maxActiveQueries() {
|
|
atomic.AddInt32(&m.activeQueriesAtomic, -1)
|
|
metricDNSQueryErrorQueue.Add(1)
|
|
return nil, errFullQueue
|
|
}
|
|
defer atomic.AddInt32(&m.activeQueriesAtomic, -1)
|
|
return m.resolver.Query(ctx, bs, from)
|
|
}
|
|
|
|
const (
|
|
// RFC 7766 6.2 recommends connection reuse & request pipelining
|
|
// be undertaken, and the connection be closed by the server
|
|
// using an idle timeout on the order of seconds.
|
|
idleTimeoutTCP = 45 * time.Second
|
|
// The RFCs don't specify the max size of a TCP-based DNS query,
|
|
// but we want to keep this reasonable. Given payloads are typically
|
|
// much larger and all known client send a single query, I've arbitrarily
|
|
// chosen 2k.
|
|
maxReqSizeTCP = 2048
|
|
)
|
|
|
|
// dnsTCPSession services DNS requests sent over TCP.
|
|
type dnsTCPSession struct {
|
|
m *Manager
|
|
|
|
conn net.Conn
|
|
srcAddr netip.AddrPort
|
|
|
|
readClosing chan struct{}
|
|
responses chan []byte // DNS replies pending writing
|
|
|
|
ctx context.Context
|
|
closeCtx context.CancelFunc
|
|
}
|
|
|
|
func (s *dnsTCPSession) handleWrites() {
|
|
defer s.conn.Close()
|
|
defer close(s.responses)
|
|
defer s.closeCtx()
|
|
|
|
for {
|
|
select {
|
|
case <-s.readClosing:
|
|
return // connection closed or timeout, teardown time
|
|
|
|
case resp := <-s.responses:
|
|
s.conn.SetWriteDeadline(time.Now().Add(idleTimeoutTCP))
|
|
if err := binary.Write(s.conn, binary.BigEndian, uint16(len(resp))); err != nil {
|
|
s.m.logf("tcp write (len): %v", err)
|
|
return
|
|
}
|
|
if _, err := s.conn.Write(resp); err != nil {
|
|
s.m.logf("tcp write (response): %v", err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *dnsTCPSession) handleQuery(q []byte) {
|
|
resp, err := s.m.Query(s.ctx, q, s.srcAddr)
|
|
if err != nil {
|
|
s.m.logf("tcp query: %v", err)
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-s.ctx.Done():
|
|
case s.responses <- resp:
|
|
}
|
|
}
|
|
|
|
func (s *dnsTCPSession) handleReads() {
|
|
defer close(s.readClosing)
|
|
|
|
for {
|
|
select {
|
|
case <-s.ctx.Done():
|
|
return
|
|
|
|
default:
|
|
s.conn.SetReadDeadline(time.Now().Add(idleTimeoutTCP))
|
|
var reqLen uint16
|
|
if err := binary.Read(s.conn, binary.BigEndian, &reqLen); err != nil {
|
|
if err == io.EOF || err == io.ErrClosedPipe {
|
|
return // connection closed nominally, we gucci
|
|
}
|
|
s.m.logf("tcp read (len): %v", err)
|
|
return
|
|
}
|
|
if int(reqLen) > maxReqSizeTCP {
|
|
s.m.logf("tcp request too large (%d > %d)", reqLen, maxReqSizeTCP)
|
|
return
|
|
}
|
|
|
|
buf := make([]byte, int(reqLen))
|
|
if _, err := io.ReadFull(s.conn, buf); err != nil {
|
|
s.m.logf("tcp read (payload): %v", err)
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-s.ctx.Done():
|
|
return
|
|
default:
|
|
go s.handleQuery(buf)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// HandleTCPConn implements magicDNS over TCP, taking a connection and
|
|
// servicing DNS requests sent down it.
|
|
func (m *Manager) HandleTCPConn(conn net.Conn, srcAddr netip.AddrPort) {
|
|
s := dnsTCPSession{
|
|
m: m,
|
|
conn: conn,
|
|
srcAddr: srcAddr,
|
|
responses: make(chan []byte),
|
|
readClosing: make(chan struct{}),
|
|
}
|
|
s.ctx, s.closeCtx = context.WithCancel(m.ctx)
|
|
go s.handleReads()
|
|
s.handleWrites()
|
|
}
|
|
|
|
func (m *Manager) Down() error {
|
|
m.ctxCancel()
|
|
if err := m.os.Close(); err != nil {
|
|
return err
|
|
}
|
|
m.resolver.Close()
|
|
return nil
|
|
}
|
|
|
|
func (m *Manager) FlushCaches() error {
|
|
return flushCaches()
|
|
}
|
|
|
|
// Cleanup restores the system DNS configuration to its original state
|
|
// in case the Tailscale daemon terminated without closing the router.
|
|
// No other state needs to be instantiated before this runs.
|
|
func Cleanup(logf logger.Logf, interfaceName string) {
|
|
oscfg, err := NewOSConfigurator(logf, interfaceName)
|
|
if err != nil {
|
|
logf("creating dns cleanup: %v", err)
|
|
return
|
|
}
|
|
dns := NewManager(logf, oscfg, nil, &tsdial.Dialer{Logf: logf}, nil)
|
|
if err := dns.Down(); err != nil {
|
|
logf("dns down: %v", err)
|
|
}
|
|
}
|
|
|
|
var (
|
|
metricDNSQueryErrorQueue = clientmetric.NewCounter("dns_query_local_error_queue")
|
|
)
|