564 lines
14 KiB
Go
564 lines
14 KiB
Go
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package derphttp implements DERP-over-HTTP.
|
|
//
|
|
// This makes DERP look exactly like WebSockets.
|
|
// A server can implement DERP over HTTPS and even if the TLS connection
|
|
// intercepted using a fake root CA, unless the interceptor knows how to
|
|
// detect DERP packets, it will look like a web socket.
|
|
package derphttp
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"crypto/tls"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"sync"
|
|
"time"
|
|
|
|
"inet.af/netaddr"
|
|
"tailscale.com/derp"
|
|
"tailscale.com/net/dnscache"
|
|
"tailscale.com/net/netns"
|
|
"tailscale.com/net/tlsdial"
|
|
"tailscale.com/tailcfg"
|
|
"tailscale.com/types/key"
|
|
"tailscale.com/types/logger"
|
|
)
|
|
|
|
// Client is a DERP-over-HTTP client.
|
|
//
|
|
// It automatically reconnects on error retry. That is, a failed Send or
|
|
// Recv will report the error and not retry, but subsequent calls to
|
|
// Send/Recv will completely re-establish the connection (unless Close
|
|
// has been called).
|
|
type Client struct {
|
|
TLSConfig *tls.Config // optional; nil means default
|
|
DNSCache *dnscache.Resolver // optional; nil means no caching
|
|
|
|
privateKey key.Private
|
|
logf logger.Logf
|
|
|
|
// Either url or getRegion is non-nil:
|
|
url *url.URL
|
|
getRegion func() *tailcfg.DERPRegion
|
|
|
|
ctx context.Context // closed via cancelCtx in Client.Close
|
|
cancelCtx context.CancelFunc
|
|
|
|
mu sync.Mutex
|
|
preferred bool
|
|
closed bool
|
|
netConn io.Closer
|
|
client *derp.Client
|
|
}
|
|
|
|
// NewRegionClient returns a new DERP-over-HTTP client. It connects lazily.
|
|
// To trigger a connection, use Connect.
|
|
func NewRegionClient(privateKey key.Private, logf logger.Logf, getRegion func() *tailcfg.DERPRegion) *Client {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
c := &Client{
|
|
privateKey: privateKey,
|
|
logf: logf,
|
|
getRegion: getRegion,
|
|
ctx: ctx,
|
|
cancelCtx: cancel,
|
|
}
|
|
return c
|
|
}
|
|
|
|
// NewNetcheckClient returns a Client that's only able to have its DialRegion method called.
|
|
// It's used by the netcheck package.
|
|
func NewNetcheckClient(logf logger.Logf) *Client {
|
|
return &Client{logf: logf}
|
|
}
|
|
|
|
// NewClient returns a new DERP-over-HTTP client. It connects lazily.
|
|
// To trigger a connection, use Connect.
|
|
func NewClient(privateKey key.Private, serverURL string, logf logger.Logf) (*Client, error) {
|
|
u, err := url.Parse(serverURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("derphttp.NewClient: %v", err)
|
|
}
|
|
if urlPort(u) == "" {
|
|
return nil, fmt.Errorf("derphttp.NewClient: invalid URL scheme %q", u.Scheme)
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
c := &Client{
|
|
privateKey: privateKey,
|
|
logf: logf,
|
|
url: u,
|
|
ctx: ctx,
|
|
cancelCtx: cancel,
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
type dialer interface {
|
|
Dial(network, address string) (net.Conn, error)
|
|
DialContext(ctx context.Context, network, address string) (net.Conn, error)
|
|
}
|
|
|
|
// Connect connects or reconnects to the server, unless already connected.
|
|
// It returns nil if there was already a good connection, or if one was made.
|
|
func (c *Client) Connect(ctx context.Context) error {
|
|
_, err := c.connect(ctx, "derphttp.Client.Connect")
|
|
return err
|
|
}
|
|
|
|
func urlPort(u *url.URL) string {
|
|
if p := u.Port(); p != "" {
|
|
return p
|
|
}
|
|
switch u.Scheme {
|
|
case "https":
|
|
return "443"
|
|
case "http":
|
|
return "80"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (c *Client) targetString(reg *tailcfg.DERPRegion) string {
|
|
if c.url != nil {
|
|
return c.url.String()
|
|
}
|
|
return fmt.Sprintf("region %d (%v)", reg.RegionID, reg.RegionCode)
|
|
}
|
|
|
|
func (c *Client) useHTTPS() bool {
|
|
if c.url != nil && c.url.Scheme == "http" {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// tlsServerName returns which TLS cert name to expect for the given node.
|
|
func (c *Client) tlsServerName(node *tailcfg.DERPNode) string {
|
|
if c.url != nil {
|
|
return c.url.Host
|
|
}
|
|
if node.CertName != "" {
|
|
return node.CertName
|
|
}
|
|
return node.HostName
|
|
}
|
|
|
|
func (c *Client) urlString(node *tailcfg.DERPNode) string {
|
|
if c.url != nil {
|
|
return c.url.String()
|
|
}
|
|
return fmt.Sprintf("https://%s/derp", node.HostName)
|
|
}
|
|
|
|
func (c *Client) connect(ctx context.Context, caller string) (client *derp.Client, err error) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if c.closed {
|
|
return nil, ErrClientClosed
|
|
}
|
|
if c.client != nil {
|
|
return c.client, nil
|
|
}
|
|
|
|
// timeout is the fallback maximum time (if ctx doesn't limit
|
|
// it further) to do all of: DNS + TCP + TLS + HTTP Upgrade +
|
|
// DERP upgrade.
|
|
const timeout = 10 * time.Second
|
|
ctx, cancel := context.WithTimeout(ctx, timeout)
|
|
go func() {
|
|
select {
|
|
case <-ctx.Done():
|
|
// Either timeout fired (handled below), or
|
|
// we're returning via the defer cancel()
|
|
// below.
|
|
case <-c.ctx.Done():
|
|
// Propagate a Client.Close call into
|
|
// cancelling this context.
|
|
cancel()
|
|
}
|
|
}()
|
|
defer cancel()
|
|
|
|
var reg *tailcfg.DERPRegion // nil when using c.url to dial
|
|
if c.getRegion != nil {
|
|
reg = c.getRegion()
|
|
if reg == nil {
|
|
return nil, errors.New("DERP region not available")
|
|
}
|
|
}
|
|
|
|
var tcpConn net.Conn
|
|
|
|
defer func() {
|
|
if err != nil {
|
|
if ctx.Err() != nil {
|
|
err = fmt.Errorf("%v: %v", ctx.Err(), err)
|
|
}
|
|
err = fmt.Errorf("%s connect to %v: %v", caller, c.targetString(reg), err)
|
|
if tcpConn != nil {
|
|
go tcpConn.Close()
|
|
}
|
|
}
|
|
}()
|
|
|
|
var node *tailcfg.DERPNode // nil when using c.url to dial
|
|
if c.url != nil {
|
|
c.logf("%s: connecting to %v", caller, c.url)
|
|
tcpConn, err = c.dialURL(ctx)
|
|
} else {
|
|
c.logf("%s: connecting to derp-%d (%v)", caller, reg.RegionID, reg.RegionCode)
|
|
tcpConn, node, err = c.dialRegion(ctx, reg)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Now that we have a TCP connection, force close it if the
|
|
// TLS handshake + DERP setup takes too long.
|
|
done := make(chan struct{})
|
|
defer close(done)
|
|
go func() {
|
|
select {
|
|
case <-done:
|
|
// Normal path. Upgrade occurred in time.
|
|
case <-ctx.Done():
|
|
select {
|
|
case <-done:
|
|
// Normal path. Upgrade occurred in time.
|
|
// But the ctx.Done() is also done because
|
|
// the "defer cancel()" above scheduled
|
|
// before this goroutine.
|
|
default:
|
|
// The TLS or HTTP or DERP exchanges didn't complete
|
|
// in time. Force close the TCP connection to force
|
|
// them to fail quickly.
|
|
tcpConn.Close()
|
|
}
|
|
}
|
|
}()
|
|
|
|
var httpConn net.Conn // a TCP conn or a TLS conn; what we speak HTTP to
|
|
if c.useHTTPS() {
|
|
httpConn = c.tlsClient(tcpConn, node)
|
|
} else {
|
|
httpConn = tcpConn
|
|
}
|
|
|
|
brw := bufio.NewReadWriter(bufio.NewReader(httpConn), bufio.NewWriter(httpConn))
|
|
|
|
req, err := http.NewRequest("GET", c.urlString(node), nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Upgrade", "DERP")
|
|
req.Header.Set("Connection", "Upgrade")
|
|
|
|
if err := req.Write(brw); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := brw.Flush(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := http.ReadResponse(brw.Reader, req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if resp.StatusCode != http.StatusSwitchingProtocols {
|
|
b, _ := ioutil.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
return nil, fmt.Errorf("GET failed: %v: %s", err, b)
|
|
}
|
|
|
|
derpClient, err := derp.NewClient(c.privateKey, httpConn, brw, c.logf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if c.preferred {
|
|
if err := derpClient.NotePreferred(true); err != nil {
|
|
go httpConn.Close()
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
c.client = derpClient
|
|
c.netConn = tcpConn
|
|
return c.client, nil
|
|
}
|
|
|
|
func (c *Client) dialURL(ctx context.Context) (net.Conn, error) {
|
|
host := c.url.Hostname()
|
|
hostOrIP := host
|
|
|
|
var stdDialer dialer = netns.Dialer()
|
|
var dialer = stdDialer
|
|
if wrapDialer != nil {
|
|
dialer = wrapDialer(dialer)
|
|
}
|
|
|
|
if c.DNSCache != nil {
|
|
ip, err := c.DNSCache.LookupIP(ctx, host)
|
|
if err == nil {
|
|
hostOrIP = ip.String()
|
|
}
|
|
if err != nil && dialer == stdDialer {
|
|
// Return an error if we're not using a dial
|
|
// proxy that can do DNS lookups for us.
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
tcpConn, err := dialer.DialContext(ctx, "tcp", net.JoinHostPort(hostOrIP, urlPort(c.url)))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("dial of %v: %v", host, err)
|
|
}
|
|
return tcpConn, nil
|
|
}
|
|
|
|
// dialRegion returns a TCP connection to the provided region, trying
|
|
// each node in order (with dialNode) until one connects or ctx is
|
|
// done.
|
|
func (c *Client) dialRegion(ctx context.Context, reg *tailcfg.DERPRegion) (net.Conn, *tailcfg.DERPNode, error) {
|
|
if len(reg.Nodes) == 0 {
|
|
return nil, nil, fmt.Errorf("no nodes for %s", c.targetString(reg))
|
|
}
|
|
var firstErr error
|
|
for _, n := range reg.Nodes {
|
|
if n.STUNOnly {
|
|
continue
|
|
}
|
|
c, err := c.dialNode(ctx, n)
|
|
if err == nil {
|
|
return c, n, nil
|
|
}
|
|
if firstErr == nil {
|
|
firstErr = err
|
|
}
|
|
}
|
|
return nil, nil, firstErr
|
|
}
|
|
|
|
func (c *Client) tlsClient(nc net.Conn, node *tailcfg.DERPNode) *tls.Conn {
|
|
tlsConf := tlsdial.Config(c.tlsServerName(node), c.TLSConfig)
|
|
if node != nil && node.DERPTestPort != 0 {
|
|
tlsConf.InsecureSkipVerify = true
|
|
}
|
|
return tls.Client(nc, tlsConf)
|
|
}
|
|
|
|
func (c *Client) DialRegionTLS(ctx context.Context, reg *tailcfg.DERPRegion) (tlsConn *tls.Conn, connClose io.Closer, err error) {
|
|
tcpConn, node, err := c.dialRegion(ctx, reg)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
done := make(chan bool) // unbufferd
|
|
defer close(done)
|
|
|
|
tlsConn = c.tlsClient(tcpConn, node)
|
|
go func() {
|
|
select {
|
|
case <-done:
|
|
case <-ctx.Done():
|
|
tcpConn.Close()
|
|
}
|
|
}()
|
|
err = tlsConn.Handshake()
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
select {
|
|
case done <- true:
|
|
return tlsConn, tcpConn, nil
|
|
case <-ctx.Done():
|
|
return nil, nil, ctx.Err()
|
|
}
|
|
}
|
|
|
|
func (c *Client) dialContext(ctx context.Context, proto, addr string) (net.Conn, error) {
|
|
var stdDialer dialer = netns.Dialer()
|
|
var dialer = stdDialer
|
|
if wrapDialer != nil {
|
|
dialer = wrapDialer(dialer)
|
|
}
|
|
return dialer.DialContext(ctx, proto, addr)
|
|
}
|
|
|
|
// shouldDialProto reports whether an explicitly provided IPv4 or IPv6
|
|
// address (given in s) is valid. An empty value means to dial, but to
|
|
// use DNS. The predicate function reports whether the non-empty
|
|
// string s contained a valid IP address of the right family.
|
|
func shouldDialProto(s string, pred func(netaddr.IP) bool) bool {
|
|
if s == "" {
|
|
return true
|
|
}
|
|
ip, _ := netaddr.ParseIP(s)
|
|
return pred(ip)
|
|
}
|
|
|
|
const dialNodeTimeout = 1500 * time.Millisecond
|
|
|
|
// dialNode returns a TCP connection to node n, racing IPv4 and IPv6
|
|
// (both as applicable) against each other.
|
|
// A node is only given dialNodeTimeout to connect.
|
|
//
|
|
// TODO(bradfitz): longer if no options remain perhaps? ... Or longer
|
|
// overall but have dialRegion start overlapping races?
|
|
func (c *Client) dialNode(ctx context.Context, n *tailcfg.DERPNode) (net.Conn, error) {
|
|
type res struct {
|
|
c net.Conn
|
|
err error
|
|
}
|
|
resc := make(chan res) // must be unbuffered
|
|
ctx, cancel := context.WithTimeout(ctx, dialNodeTimeout)
|
|
defer cancel()
|
|
|
|
nwait := 0
|
|
startDial := func(dstPrimary, proto string) {
|
|
nwait++
|
|
go func() {
|
|
dst := dstPrimary
|
|
if dst == "" {
|
|
dst = n.HostName
|
|
}
|
|
port := "443"
|
|
if n.DERPTestPort != 0 {
|
|
port = fmt.Sprint(n.DERPTestPort)
|
|
}
|
|
c, err := c.dialContext(ctx, proto, net.JoinHostPort(dst, port))
|
|
select {
|
|
case resc <- res{c, err}:
|
|
case <-ctx.Done():
|
|
if c != nil {
|
|
c.Close()
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
if shouldDialProto(n.IPv4, netaddr.IP.Is4) {
|
|
startDial(n.IPv4, "tcp4")
|
|
}
|
|
if shouldDialProto(n.IPv6, netaddr.IP.Is6) {
|
|
startDial(n.IPv6, "tcp6")
|
|
}
|
|
if nwait == 0 {
|
|
return nil, errors.New("both IPv4 and IPv6 are explicitly disabled for node")
|
|
}
|
|
|
|
var firstErr error
|
|
for {
|
|
select {
|
|
case res := <-resc:
|
|
nwait--
|
|
if res.err == nil {
|
|
return res.c, nil
|
|
}
|
|
if firstErr == nil {
|
|
firstErr = res.err
|
|
}
|
|
if nwait == 0 {
|
|
return nil, firstErr
|
|
}
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *Client) Send(dstKey key.Public, b []byte) error {
|
|
client, err := c.connect(context.TODO(), "derphttp.Client.Send")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := client.Send(dstKey, b); err != nil {
|
|
c.closeForReconnect(client)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// NotePreferred notes whether this Client is the caller's preferred
|
|
// (home) DERP node. It's only used for stats.
|
|
func (c *Client) NotePreferred(v bool) {
|
|
c.mu.Lock()
|
|
if c.preferred == v {
|
|
c.mu.Unlock()
|
|
return
|
|
}
|
|
c.preferred = v
|
|
client := c.client
|
|
c.mu.Unlock()
|
|
|
|
if client != nil {
|
|
if err := client.NotePreferred(v); err != nil {
|
|
c.closeForReconnect(client)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *Client) Recv(b []byte) (derp.ReceivedMessage, error) {
|
|
client, err := c.connect(context.TODO(), "derphttp.Client.Recv")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
m, err := client.Recv(b)
|
|
if err != nil {
|
|
c.closeForReconnect(client)
|
|
}
|
|
return m, err
|
|
}
|
|
|
|
// Close closes the client. It will not automatically reconnect after
|
|
// being closed.
|
|
func (c *Client) Close() error {
|
|
c.cancelCtx() // not in lock, so it can cancel Connect, which holds mu
|
|
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if c.closed {
|
|
return ErrClientClosed
|
|
}
|
|
c.closed = true
|
|
if c.netConn != nil {
|
|
c.netConn.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// closeForReconnect closes the underlying network connection and
|
|
// zeros out the client field so future calls to Connect will
|
|
// reconnect.
|
|
//
|
|
// The provided brokenClient is the client to forget. If current
|
|
// client is not brokenClient, closeForReconnect does nothing. (This
|
|
// prevents a send and receive goroutine from failing at the ~same
|
|
// time and both calling closeForReconnect and the caller goroutines
|
|
// forever calling closeForReconnect in lockstep endlessly;
|
|
// https://github.com/tailscale/tailscale/pull/264)
|
|
func (c *Client) closeForReconnect(brokenClient *derp.Client) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if c.client != brokenClient {
|
|
return
|
|
}
|
|
if c.netConn != nil {
|
|
c.netConn.Close()
|
|
c.netConn = nil
|
|
}
|
|
c.client = nil
|
|
}
|
|
|
|
var ErrClientClosed = errors.New("derphttp.Client closed")
|
|
|
|
// wrapDialer, if non-nil, specifies a function to wrap a dialer in a
|
|
// SOCKS-using dialer. It's set conditionally by socks.go.
|
|
var wrapDialer func(dialer) dialer
|