tailscale/net/dns/resolver/forwarder.go

526 lines
13 KiB
Go

// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package resolver
import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"hash/crc32"
"math/rand"
"net"
"sync"
"syscall"
"time"
dns "golang.org/x/net/dns/dnsmessage"
"inet.af/netaddr"
"tailscale.com/logtail/backoff"
"tailscale.com/types/logger"
"tailscale.com/util/dnsname"
)
// headerBytes is the number of bytes in a DNS message header.
const headerBytes = 12
// connCount is the number of UDP connections to use for forwarding.
const connCount = 32
const (
// cleanupInterval is the interval between purged of timed-out entries from txMap.
cleanupInterval = 30 * time.Second
// responseTimeout is the maximal amount of time to wait for a DNS response.
responseTimeout = 5 * time.Second
)
var errNoUpstreams = errors.New("upstream nameservers not set")
type forwardingRecord struct {
src netaddr.IPPort
createdAt time.Time
}
// txid identifies a DNS transaction.
//
// As the standard DNS Request ID is only 16 bits, we extend it:
// the lower 32 bits are the zero-extended bits of the DNS Request ID;
// the upper 32 bits are the CRC32 checksum of the first question in the request.
// This makes probability of txid collision negligible.
type txid uint64
// getTxID computes the txid of the given DNS packet.
func getTxID(packet []byte) txid {
if len(packet) < headerBytes {
return 0
}
dnsid := binary.BigEndian.Uint16(packet[0:2])
qcount := binary.BigEndian.Uint16(packet[4:6])
if qcount == 0 {
return txid(dnsid)
}
offset := headerBytes
for i := uint16(0); i < qcount; i++ {
// Note: this relies on the fact that names are not compressed in questions,
// so they are guaranteed to end with a NUL byte.
//
// Justification:
// RFC 1035 doesn't seem to explicitly prohibit compressing names in questions,
// but this is exceedingly unlikely to be done in practice. A DNS request
// with multiple questions is ill-defined (which questions do the header flags apply to?)
// and a single question would have to contain a pointer to an *answer*,
// which would be excessively smart, pointless (an answer can just as well refer to the question)
// and perhaps even prohibited: a draft RFC (draft-ietf-dnsind-local-compression-05) states:
//
// > It is important that these pointers always point backwards.
//
// This is said in summarizing RFC 1035, although that phrase does not appear in the original RFC.
// Additionally, (https://cr.yp.to/djbdns/notes.html) states:
//
// > The precise rule is that a name can be compressed if it is a response owner name,
// > the name in NS data, the name in CNAME data, the name in PTR data, the name in MX data,
// > or one of the names in SOA data.
namebytes := bytes.IndexByte(packet[offset:], 0)
// ... | name | NUL | type | class
// ?? 1 2 2
offset = offset + namebytes + 5
if len(packet) < offset {
// Corrupt packet; don't crash.
return txid(dnsid)
}
}
hash := crc32.ChecksumIEEE(packet[headerBytes:offset])
return (txid(hash) << 32) | txid(dnsid)
}
type route struct {
suffix dnsname.FQDN
resolvers []netaddr.IPPort
}
// forwarder forwards DNS packets to a number of upstream nameservers.
type forwarder struct {
logf logger.Logf
// responses is a channel by which responses are returned.
responses chan packet
// closed signals all goroutines to stop.
closed chan struct{}
// wg signals when all goroutines have stopped.
wg sync.WaitGroup
// conns are the UDP connections used for forwarding.
// A random one is selected for each request, regardless of the target upstream.
conns []*fwdConn
mu sync.Mutex
// routes are per-suffix resolvers to use.
routes []route // most specific routes first
txMap map[txid]forwardingRecord // txids to in-flight requests
}
func init() {
rand.Seed(time.Now().UnixNano())
}
func newForwarder(logf logger.Logf, responses chan packet) *forwarder {
ret := &forwarder{
logf: logger.WithPrefix(logf, "forward: "),
responses: responses,
closed: make(chan struct{}),
conns: make([]*fwdConn, connCount),
txMap: make(map[txid]forwardingRecord),
}
ret.wg.Add(connCount + 1)
for idx := range ret.conns {
ret.conns[idx] = newFwdConn(ret.logf, idx)
go ret.recv(ret.conns[idx])
}
go ret.cleanMap()
return ret
}
func (f *forwarder) Close() {
select {
case <-f.closed:
return
default:
// continue
}
close(f.closed)
for _, conn := range f.conns {
conn.close()
}
f.wg.Wait()
}
func (f *forwarder) rebindFromNetworkChange() {
for _, c := range f.conns {
c.mu.Lock()
c.reconnectLocked()
c.mu.Unlock()
}
}
func (f *forwarder) setRoutes(routes []route) {
f.mu.Lock()
f.routes = routes
f.mu.Unlock()
}
// send sends packet to dst. It is best effort.
func (f *forwarder) send(packet []byte, dst netaddr.IPPort) {
connIdx := rand.Intn(connCount)
conn := f.conns[connIdx]
conn.send(packet, dst)
}
func (f *forwarder) recv(conn *fwdConn) {
defer f.wg.Done()
for {
select {
case <-f.closed:
return
default:
}
// The 1 extra byte is to detect packet truncation.
out := make([]byte, maxResponseBytes+1)
n := conn.read(out)
var truncated bool
if n > maxResponseBytes {
n = maxResponseBytes
truncated = true
}
if n == 0 {
continue
}
if n < headerBytes {
f.logf("recv: packet too small (%d bytes)", n)
}
out = out[:n]
txid := getTxID(out)
if truncated {
const dnsFlagTruncated = 0x200
flags := binary.BigEndian.Uint16(out[2:4])
flags |= dnsFlagTruncated
binary.BigEndian.PutUint16(out[2:4], flags)
// TODO(#2067): Remove any incomplete records? RFC 1035 section 6.2
// states that truncation should head drop so that the authority
// section can be preserved if possible. However, the UDP read with
// a too-small buffer has already dropped the end, so that's the
// best we can do.
}
f.mu.Lock()
record, found := f.txMap[txid]
// At most one nameserver will return a response:
// the first one to do so will delete txid from the map.
if !found {
f.mu.Unlock()
continue
}
delete(f.txMap, txid)
f.mu.Unlock()
pkt := packet{out, record.src}
select {
case <-f.closed:
return
case f.responses <- pkt:
// continue
}
}
}
// cleanMap periodically deletes timed-out forwarding records from f.txMap to bound growth.
func (f *forwarder) cleanMap() {
defer f.wg.Done()
t := time.NewTicker(cleanupInterval)
defer t.Stop()
var now time.Time
for {
select {
case <-f.closed:
return
case now = <-t.C:
// continue
}
f.mu.Lock()
for k, v := range f.txMap {
if now.Sub(v.createdAt) > responseTimeout {
delete(f.txMap, k)
}
}
f.mu.Unlock()
}
}
// forward forwards the query to all upstream nameservers and returns the first response.
func (f *forwarder) forward(query packet) error {
domain, err := nameFromQuery(query.bs)
if err != nil {
return err
}
txid := getTxID(query.bs)
f.mu.Lock()
routes := f.routes
f.mu.Unlock()
var resolvers []netaddr.IPPort
for _, route := range routes {
if route.suffix != "." && !route.suffix.Contains(domain) {
continue
}
resolvers = route.resolvers
break
}
if len(resolvers) == 0 {
return errNoUpstreams
}
f.mu.Lock()
f.txMap[txid] = forwardingRecord{
src: query.addr,
createdAt: time.Now(),
}
f.mu.Unlock()
// TODO(#2066): EDNS size clamping
for _, resolver := range resolvers {
f.send(query.bs, resolver)
}
return nil
}
// A fwdConn manages a single connection used to forward DNS requests.
// Net link changes can cause a *net.UDPConn to become permanently unusable, particularly on macOS.
// fwdConn detects such situations and transparently creates new connections.
type fwdConn struct {
// logf allows a fwdConn to log.
logf logger.Logf
// change allows calls to read to block until a the network connection has been replaced.
change *sync.Cond
// mu protects fields that follow it; it is also change's Locker.
mu sync.Mutex
// closed tracks whether fwdConn has been permanently closed.
closed bool
// conn is the current active connection.
conn net.PacketConn
}
func newFwdConn(logf logger.Logf, idx int) *fwdConn {
c := new(fwdConn)
c.logf = logger.WithPrefix(logf, fmt.Sprintf("fwdConn %d: ", idx))
c.change = sync.NewCond(&c.mu)
// c.conn is created lazily in send
return c
}
// send sends packet to dst using c's connection.
// It is best effort. It is UDP, after all. Failures are logged.
func (c *fwdConn) send(packet []byte, dst netaddr.IPPort) {
var b *backoff.Backoff // lazily initialized, since it is not needed in the common case
backOff := func(err error) {
if b == nil {
b = backoff.NewBackoff("dns-fwdConn-send", c.logf, 30*time.Second)
}
b.BackOff(context.Background(), err)
}
for {
// Gather the current connection.
// We can't hold the lock while we call WriteTo.
c.mu.Lock()
conn := c.conn
closed := c.closed
if closed {
c.mu.Unlock()
return
}
if conn == nil {
c.reconnectLocked()
c.mu.Unlock()
continue
}
c.mu.Unlock()
_, err := conn.WriteTo(packet, dst.UDPAddr())
if err == nil {
// Success
return
}
if errors.Is(err, net.ErrClosed) {
// We intentionally closed this connection.
// It has been replaced by a new connection. Try again.
continue
}
// Something else went wrong.
// We have three choices here: try again, give up, or create a new connection.
var opErr *net.OpError
if !errors.As(err, &opErr) {
// Weird. All errors from the net package should be *net.OpError. Bail.
c.logf("send: non-*net.OpErr %v (%T)", err, err)
return
}
if opErr.Temporary() || opErr.Timeout() {
// I doubt that either of these can happen (this is UDP),
// but go ahead and try again.
backOff(err)
continue
}
if errors.Is(err, syscall.EHOSTUNREACH) {
// "No route to host." The network stack is fine, but
// can't talk to this destination. Not much we can do
// about that, don't spam logs.
return
}
if networkIsDown(err) {
// Fail.
c.logf("send: network is down")
return
}
if networkIsUnreachable(err) {
// This can be caused by a link change.
// Replace the existing connection with a new one.
c.mu.Lock()
// It's possible that multiple senders discovered simultaneously
// that the network is unreachable. Avoid reconnecting multiple times:
// Only reconnect if the current connection is the one that we
// discovered to be problematic.
if c.conn == conn {
backOff(err)
c.reconnectLocked()
}
c.mu.Unlock()
// Try again with our new network connection.
continue
}
// Unrecognized error. Fail.
c.logf("send: unrecognized error: %v", err)
return
}
}
// read waits for a response from c's connection.
// It returns the number of bytes read, which may be 0
// in case of an error or a closed connection.
func (c *fwdConn) read(out []byte) int {
for {
// Gather the current connection.
// We can't hold the lock while we call ReadFrom.
c.mu.Lock()
conn := c.conn
closed := c.closed
if closed {
c.mu.Unlock()
return 0
}
if conn == nil {
// There is no current connection.
// Wait for the connection to change, then try again.
c.change.Wait()
c.mu.Unlock()
continue
}
c.mu.Unlock()
n, _, err := conn.ReadFrom(out)
if err == nil || packetWasTruncated(err) {
// Success.
return n
}
if errors.Is(err, net.ErrClosed) {
// We intentionally closed this connection.
// It has been replaced by a new connection. Try again.
continue
}
c.logf("read: unrecognized error: %v", err)
return 0
}
}
// reconnectLocked replaces the current connection with a new one.
// c.mu must be locked.
func (c *fwdConn) reconnectLocked() {
c.closeConnLocked()
// Make a new connection.
conn, err := net.ListenPacket("udp", "")
if err != nil {
c.logf("ListenPacket failed: %v", err)
} else {
c.conn = conn
}
// Broadcast that a new connection is available.
c.change.Broadcast()
}
// closeCurrentConn closes the current connection.
// c.mu must be locked.
func (c *fwdConn) closeConnLocked() {
if c.conn == nil {
return
}
c.conn.Close() // unblocks all readers/writers, they'll pick up the next connection.
c.conn = nil
}
// close permanently closes c.
func (c *fwdConn) close() {
c.mu.Lock()
defer c.mu.Unlock()
if c.closed {
return
}
c.closed = true
c.closeConnLocked()
// Unblock any remaining readers.
c.change.Broadcast()
}
// nameFromQuery extracts the normalized query name from bs.
func nameFromQuery(bs []byte) (dnsname.FQDN, error) {
var parser dns.Parser
hdr, err := parser.Start(bs)
if err != nil {
return "", err
}
if hdr.Response {
return "", errNotQuery
}
q, err := parser.Question()
if err != nil {
return "", err
}
n := q.Name.Data[:q.Name.Length]
return dnsname.ToFQDN(rawNameToLower(n))
}