2023-01-27 21:37:20 +00:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2020-02-18 19:07:44 +00:00
// The derper binary is a simple DERP server.
package main // import "tailscale.com/cmd/derper"
import (
2024-02-07 22:27:29 +00:00
"cmp"
2020-03-02 16:55:44 +00:00
"context"
2020-08-18 23:32:32 +01:00
"crypto/tls"
2020-02-18 19:07:44 +00:00
"encoding/json"
2020-03-02 16:55:44 +00:00
"errors"
2020-02-21 17:35:53 +00:00
"expvar"
2020-02-18 19:07:44 +00:00
"flag"
2021-12-06 08:28:19 +00:00
"fmt"
2020-02-18 19:07:44 +00:00
"io"
"log"
2022-02-11 20:02:38 +00:00
"math"
2020-02-27 03:11:14 +00:00
"net"
2020-02-18 19:07:44 +00:00
"net/http"
"os"
2024-01-05 19:14:42 +00:00
"os/signal"
2020-02-18 19:07:44 +00:00
"path/filepath"
2020-03-02 16:55:44 +00:00
"regexp"
2020-06-01 23:19:41 +01:00
"strings"
2024-01-05 19:14:42 +00:00
"syscall"
2020-02-27 03:11:14 +00:00
"time"
2020-02-18 19:07:44 +00:00
2022-09-07 21:31:06 +01:00
"go4.org/mem"
2022-02-11 20:02:38 +00:00
"golang.org/x/time/rate"
2020-02-18 19:07:44 +00:00
"tailscale.com/atomicfile"
"tailscale.com/derp"
"tailscale.com/derp/derphttp"
2020-03-04 20:24:07 +00:00
"tailscale.com/metrics"
2024-01-05 19:14:42 +00:00
"tailscale.com/net/stunserver"
2020-02-25 16:07:41 +00:00
"tailscale.com/tsweb"
2020-02-18 19:07:44 +00:00
"tailscale.com/types/key"
)
var (
2023-04-05 01:10:50 +01:00
dev = flag . Bool ( "dev" , false , "run in localhost development mode (overrides -a)" )
addr = flag . String ( "a" , ":443" , "server HTTP/HTTPS listen address, in form \":port\", \"ip:port\", or for IPv6 \"[ip]:port\". If the IP is omitted, it defaults to all interfaces. Serves HTTPS if the port is 443 and/or -certmode is manual, otherwise HTTP." )
2022-08-01 22:36:55 +01:00
httpPort = flag . Int ( "http-port" , 80 , "The port on which to serve HTTP. Set to -1 to disable. The listener is bound to the same IP (if any) as specified in the -a flag." )
stunPort = flag . Int ( "stun-port" , 3478 , "The UDP port on which to serve STUN. The listener is bound to the same IP (if any) as specified in the -a flag." )
configPath = flag . String ( "c" , "" , "config file path" )
certMode = flag . String ( "certmode" , "letsencrypt" , "mode for getting a cert. possible options: manual, letsencrypt" )
certDir = flag . String ( "certdir" , tsweb . DefaultCertDir ( "derper-certs" ) , "directory to store LetsEncrypt certs, if addr's port is :443" )
hostname = flag . String ( "hostname" , "derp.tailscale.com" , "LetsEncrypt host name, if addr's port is :443" )
runSTUN = flag . Bool ( "stun" , true , "whether to run a STUN server. It will bind to the same IP (if any) as the --addr flag value." )
2022-09-04 03:41:31 +01:00
runDERP = flag . Bool ( "derp" , true , "whether to run a DERP server. The only reason to set this false is if you're decommissioning a server but want to keep its bootstrap DNS functionality still running." )
2022-02-01 01:35:53 +00:00
2022-09-02 19:48:30 +01:00
meshPSKFile = flag . String ( "mesh-psk-file" , defaultMeshPSKFile ( ) , "if non-empty, path to file containing the mesh pre-shared key file. It should contain some hex string; whitespace is trimmed." )
meshWith = flag . String ( "mesh-with" , "" , "optional comma-separated list of hostnames to mesh with; the server's own hostname can be in the list" )
bootstrapDNS = flag . String ( "bootstrap-dns-names" , "" , "optional comma-separated list of hostnames to make available at /bootstrap-dns" )
unpublishedDNS = flag . String ( "unpublished-bootstrap-dns-names" , "" , "optional comma-separated list of hostnames to make available at /bootstrap-dns and not publish in the list" )
verifyClients = flag . Bool ( "verify-clients" , false , "verify clients to this DERP server through a local tailscaled instance." )
2022-02-11 20:02:38 +00:00
acceptConnLimit = flag . Float64 ( "accept-connection-limit" , math . Inf ( + 1 ) , "rate limit for accepting new connection" )
acceptConnBurst = flag . Int ( "accept-connection-burst" , math . MaxInt , "burst limit for accepting new connection" )
2020-02-18 19:07:44 +00:00
)
2021-09-02 18:16:31 +01:00
var (
2022-01-21 22:07:40 +00:00
tlsRequestVersion = & metrics . LabelMap { Label : "version" }
tlsActiveVersion = & metrics . LabelMap { Label : "version" }
2021-09-02 18:16:31 +01:00
)
func init ( ) {
2022-01-25 18:43:47 +00:00
expvar . Publish ( "derper_tls_request_version" , tlsRequestVersion )
expvar . Publish ( "gauge_derper_tls_active_version" , tlsActiveVersion )
2021-09-02 18:16:31 +01:00
}
2020-02-18 19:07:44 +00:00
type config struct {
2021-10-28 01:29:43 +01:00
PrivateKey key . NodePrivate
2020-02-18 19:07:44 +00:00
}
func loadConfig ( ) config {
2020-02-21 17:35:53 +00:00
if * dev {
2021-10-28 01:29:43 +01:00
return config { PrivateKey : key . NewNode ( ) }
2020-02-21 17:35:53 +00:00
}
2020-02-18 19:07:44 +00:00
if * configPath == "" {
2021-07-14 23:20:38 +01:00
if os . Getuid ( ) == 0 {
* configPath = "/var/lib/derper/derper.key"
} else {
2021-07-15 01:29:06 +01:00
log . Fatalf ( "derper: -c <config path> not specified" )
2021-07-14 23:20:38 +01:00
}
log . Printf ( "no config path specified; using %s" , * configPath )
2020-02-18 19:07:44 +00:00
}
2022-09-15 13:06:59 +01:00
b , err := os . ReadFile ( * configPath )
2020-02-18 19:07:44 +00:00
switch {
2020-11-02 16:33:34 +00:00
case errors . Is ( err , os . ErrNotExist ) :
2020-02-18 19:07:44 +00:00
return writeNewConfig ( )
case err != nil :
log . Fatal ( err )
panic ( "unreachable" )
default :
var cfg config
if err := json . Unmarshal ( b , & cfg ) ; err != nil {
log . Fatalf ( "derper: config: %v" , err )
}
return cfg
}
}
2020-02-21 17:35:53 +00:00
func writeNewConfig ( ) config {
2021-10-28 01:29:43 +01:00
k := key . NewNode ( )
2020-02-18 19:07:44 +00:00
if err := os . MkdirAll ( filepath . Dir ( * configPath ) , 0777 ) ; err != nil {
log . Fatal ( err )
}
cfg := config {
2021-10-28 01:29:43 +01:00
PrivateKey : k ,
2020-02-18 19:07:44 +00:00
}
b , err := json . MarshalIndent ( cfg , "" , "\t" )
if err != nil {
log . Fatal ( err )
}
2021-01-12 03:16:14 +00:00
if err := atomicfile . WriteFile ( * configPath , b , 0600 ) ; err != nil {
2020-02-18 19:07:44 +00:00
log . Fatal ( err )
}
return cfg
}
func main ( ) {
flag . Parse ( )
2024-01-05 19:14:42 +00:00
ctx , cancel := signal . NotifyContext ( context . Background ( ) , syscall . SIGINT , syscall . SIGTERM )
defer cancel ( )
2020-02-21 17:35:53 +00:00
if * dev {
* addr = ":3340" // above the keys DERP
log . Printf ( "Running in dev mode." )
2020-03-03 19:33:22 +00:00
tsweb . DevMode = true
2020-02-21 17:35:53 +00:00
}
2021-09-02 18:42:27 +01:00
listenHost , _ , err := net . SplitHostPort ( * addr )
if err != nil {
log . Fatalf ( "invalid server address: %v" , err )
}
2024-01-05 19:14:42 +00:00
if * runSTUN {
ss := stunserver . New ( ctx )
go ss . ListenAndServe ( net . JoinHostPort ( listenHost , fmt . Sprint ( * stunPort ) ) )
}
2020-02-18 19:07:44 +00:00
cfg := loadConfig ( )
2021-10-31 02:12:09 +00:00
serveTLS := tsweb . IsProd443 ( * addr ) || * certMode == "manual"
2020-02-18 19:07:44 +00:00
2021-10-28 23:42:50 +01:00
s := derp . NewServer ( cfg . PrivateKey , log . Printf )
2021-06-24 21:31:05 +01:00
s . SetVerifyClient ( * verifyClients )
2020-06-01 23:19:41 +01:00
if * meshPSKFile != "" {
2022-09-15 13:06:59 +01:00
b , err := os . ReadFile ( * meshPSKFile )
2020-06-01 23:19:41 +01:00
if err != nil {
log . Fatal ( err )
}
key := strings . TrimSpace ( string ( b ) )
if matched , _ := regexp . MatchString ( ` (?i)^[0-9a-f] { 64,}$ ` , key ) ; ! matched {
log . Fatalf ( "key in %s must contain 64+ hex digits" , * meshPSKFile )
}
s . SetMeshKey ( key )
log . Printf ( "DERP mesh key configured" )
}
2020-06-03 22:42:20 +01:00
if err := startMesh ( s ) ; err != nil {
log . Fatalf ( "startMesh: %v" , err )
}
2020-02-21 17:35:53 +00:00
expvar . Publish ( "derp" , s . ExpVar ( ) )
2020-02-18 19:07:44 +00:00
2021-06-16 07:38:19 +01:00
mux := http . NewServeMux ( )
2022-09-04 03:41:31 +01:00
if * runDERP {
derpHandler := derphttp . Handler ( s )
derpHandler = addWebSocketSupport ( s , derpHandler )
mux . Handle ( "/derp" , derpHandler )
} else {
mux . Handle ( "/derp" , http . HandlerFunc ( func ( w http . ResponseWriter , r * http . Request ) {
http . Error ( w , "derp server disabled" , http . StatusNotFound )
} ) )
}
2021-10-27 17:37:32 +01:00
mux . HandleFunc ( "/derp/probe" , probeHandler )
2021-02-26 16:28:31 +00:00
go refreshBootstrapDNSLoop ( )
2023-07-11 19:53:46 +01:00
mux . HandleFunc ( "/bootstrap-dns" , tsweb . BrowserHeaderHandlerFunc ( handleBootstrapDNS ) )
2020-02-18 19:07:44 +00:00
mux . Handle ( "/" , http . HandlerFunc ( func ( w http . ResponseWriter , r * http . Request ) {
2023-07-11 19:53:46 +01:00
tsweb . AddBrowserHeaders ( w )
2020-02-21 17:35:53 +00:00
w . Header ( ) . Set ( "Content-Type" , "text/html; charset=utf-8" )
2020-02-18 19:07:44 +00:00
w . WriteHeader ( 200 )
2020-02-21 17:35:53 +00:00
io . WriteString ( w , ` < html > < body >
< h1 > DERP < / h1 >
< p >
This is a
< a href = "https://tailscale.com/" > Tailscale < / a >
2021-02-26 16:14:13 +00:00
< a href = "https://pkg.go.dev/tailscale.com/derp" > DERP < / a >
2020-02-21 17:35:53 +00:00
server .
< / p >
` )
2022-09-04 03:41:31 +01:00
if ! * runDERP {
io . WriteString ( w , ` <p>Status: <b>disabled</b></p> ` )
}
2020-02-25 16:07:41 +00:00
if tsweb . AllowDebugAccess ( r ) {
2020-02-21 17:35:53 +00:00
io . WriteString ( w , "<p>Debug info at <a href='/debug/'>/debug/</a>.</p>\n" )
}
2020-02-18 19:07:44 +00:00
} ) )
2022-09-09 18:21:39 +01:00
mux . Handle ( "/robots.txt" , http . HandlerFunc ( func ( w http . ResponseWriter , r * http . Request ) {
2023-07-11 19:53:46 +01:00
tsweb . AddBrowserHeaders ( w )
2022-09-09 18:21:39 +01:00
io . WriteString ( w , "User-agent: *\nDisallow: /\n" )
} ) )
2022-09-12 18:43:50 +01:00
mux . Handle ( "/generate_204" , http . HandlerFunc ( serveNoContent ) )
2021-06-16 07:38:19 +01:00
debug := tsweb . Debugger ( mux )
debug . KV ( "TLS hostname" , * hostname )
debug . KV ( "Mesh key" , s . HasMeshKey ( ) )
debug . Handle ( "check" , "Consistency check" , http . HandlerFunc ( func ( w http . ResponseWriter , r * http . Request ) {
err := s . ConsistencyCheck ( )
if err != nil {
http . Error ( w , err . Error ( ) , 500 )
} else {
io . WriteString ( w , "derp.Server ConsistencyCheck okay" )
}
} ) )
2021-06-18 05:34:01 +01:00
debug . Handle ( "traffic" , "Traffic check" , http . HandlerFunc ( s . ServeDebugTraffic ) )
2020-02-18 19:07:44 +00:00
2022-09-07 21:31:06 +01:00
quietLogger := log . New ( logFilter { } , "" , 0 )
2020-02-18 19:07:44 +00:00
httpsrv := & http . Server {
2022-09-07 21:31:06 +01:00
Addr : * addr ,
Handler : mux ,
ErrorLog : quietLogger ,
2021-08-31 18:18:36 +01:00
// Set read/write timeout. For derper, this basically
// only affects TLS setup, as read/write deadlines are
// cleared on Hijack, which the DERP server does. But
// without this, we slowly accumulate stuck TLS
// handshake goroutines forever. This also affects
// /debug/ traffic, but 30 seconds is plenty for
// Prometheus/etc scraping.
ReadTimeout : 30 * time . Second ,
WriteTimeout : 30 * time . Second ,
2020-02-18 19:07:44 +00:00
}
2024-01-05 19:14:42 +00:00
go func ( ) {
<- ctx . Done ( )
httpsrv . Shutdown ( ctx )
} ( )
2020-02-18 19:07:44 +00:00
2021-09-09 00:50:34 +01:00
if serveTLS {
2020-02-18 19:07:44 +00:00
log . Printf ( "derper: serving on %s with TLS" , * addr )
2021-09-09 01:00:52 +01:00
var certManager certProvider
certManager , err = certProviderByCertMode ( * certMode , * certDir , * hostname )
2021-09-09 00:50:34 +01:00
if err != nil {
log . Fatalf ( "derper: can not start cert provider: %v" , err )
2020-03-02 16:55:44 +00:00
}
2020-02-18 19:07:44 +00:00
httpsrv . TLSConfig = certManager . TLSConfig ( )
2021-09-09 00:50:34 +01:00
getCert := httpsrv . TLSConfig . GetCertificate
2020-08-18 23:32:32 +01:00
httpsrv . TLSConfig . GetCertificate = func ( hi * tls . ClientHelloInfo ) ( * tls . Certificate , error ) {
2021-09-09 00:50:34 +01:00
cert , err := getCert ( hi )
2020-08-18 23:32:32 +01:00
if err != nil {
return nil , err
}
cert . Certificate = append ( cert . Certificate , s . MetaCert ( ) )
return cert , nil
}
2022-01-28 00:51:30 +00:00
// Disable TLS 1.0 and 1.1, which are obsolete and have security issues.
httpsrv . TLSConfig . MinVersion = tls . VersionTLS12
2021-11-22 17:35:17 +00:00
httpsrv . Handler = http . HandlerFunc ( func ( w http . ResponseWriter , r * http . Request ) {
2022-01-21 22:07:40 +00:00
if r . TLS != nil {
label := "unknown"
switch r . TLS . Version {
case tls . VersionTLS10 :
label = "1.0"
case tls . VersionTLS11 :
label = "1.1"
case tls . VersionTLS12 :
label = "1.2"
case tls . VersionTLS13 :
label = "1.3"
}
tlsRequestVersion . Add ( label , 1 )
tlsActiveVersion . Add ( label , 1 )
defer tlsActiveVersion . Add ( label , - 1 )
}
2021-11-22 17:35:17 +00:00
mux . ServeHTTP ( w , r )
} )
2021-12-06 08:28:19 +00:00
if * httpPort > - 1 {
go func ( ) {
2022-09-12 18:43:50 +01:00
port80mux := http . NewServeMux ( )
port80mux . HandleFunc ( "/generate_204" , serveNoContent )
port80mux . Handle ( "/" , certManager . HTTPHandler ( tsweb . Port80Handler { Main : mux } ) )
2021-12-06 08:28:19 +00:00
port80srv := & http . Server {
Addr : net . JoinHostPort ( listenHost , fmt . Sprintf ( "%d" , * httpPort ) ) ,
2022-09-12 18:43:50 +01:00
Handler : port80mux ,
2022-09-07 21:31:06 +01:00
ErrorLog : quietLogger ,
2021-12-06 08:28:19 +00:00
ReadTimeout : 30 * time . Second ,
// Crank up WriteTimeout a bit more than usually
// necessary just so we can do long CPU profiles
// and not hit net/http/pprof's "profile
// duration exceeds server's WriteTimeout".
WriteTimeout : 5 * time . Minute ,
2020-02-18 19:07:44 +00:00
}
2021-12-06 08:28:19 +00:00
err := port80srv . ListenAndServe ( )
if err != nil {
if err != http . ErrServerClosed {
log . Fatal ( err )
}
}
} ( )
}
2022-02-11 20:02:38 +00:00
err = rateLimitedListenAndServeTLS ( httpsrv )
2020-02-18 19:07:44 +00:00
} else {
log . Printf ( "derper: serving on %s" , * addr )
err = httpsrv . ListenAndServe ( )
}
if err != nil && err != http . ErrServerClosed {
log . Fatalf ( "derper: %v" , err )
}
}
2020-02-21 17:35:53 +00:00
2022-10-14 17:42:09 +01:00
const (
noContentChallengeHeader = "X-Tailscale-Challenge"
noContentResponseHeader = "X-Tailscale-Response"
)
2022-09-12 18:43:50 +01:00
// For captive portal detection
func serveNoContent ( w http . ResponseWriter , r * http . Request ) {
2022-10-14 17:42:09 +01:00
if challenge := r . Header . Get ( noContentChallengeHeader ) ; challenge != "" {
badChar := strings . IndexFunc ( challenge , func ( r rune ) bool {
return ! isChallengeChar ( r )
} ) != - 1
if len ( challenge ) <= 64 && ! badChar {
w . Header ( ) . Set ( noContentResponseHeader , "response " + challenge )
}
}
2022-09-12 18:43:50 +01:00
w . WriteHeader ( http . StatusNoContent )
}
2022-10-14 17:42:09 +01:00
func isChallengeChar ( c rune ) bool {
// Semi-randomly chosen as a limited set of valid characters
return ( 'a' <= c && c <= 'z' ) || ( 'A' <= c && c <= 'Z' ) ||
( '0' <= c && c <= '9' ) ||
c == '.' || c == '-' || c == '_'
}
2021-10-27 17:37:32 +01:00
// probeHandler is the endpoint that js/wasm clients hit to measure
// DERP latency, since they can't do UDP STUN queries.
func probeHandler ( w http . ResponseWriter , r * http . Request ) {
switch r . Method {
case "HEAD" , "GET" :
w . Header ( ) . Set ( "Access-Control-Allow-Origin" , "*" )
default :
http . Error ( w , "bogus probe method" , http . StatusMethodNotAllowed )
}
}
2021-09-02 18:42:27 +01:00
2020-06-01 23:19:41 +01:00
var validProdHostname = regexp . MustCompile ( ` ^derp([^.]*)\.tailscale\.com\.?$ ` )
2020-03-02 16:55:44 +00:00
func prodAutocertHostPolicy ( _ context . Context , host string ) error {
if validProdHostname . MatchString ( host ) {
return nil
}
return errors . New ( "invalid hostname" )
}
2020-06-01 23:19:41 +01:00
func defaultMeshPSKFile ( ) string {
2020-06-04 16:19:30 +01:00
try := [ ] string {
"/home/derp/keys/derp-mesh.key" ,
filepath . Join ( os . Getenv ( "HOME" ) , "keys" , "derp-mesh.key" ) ,
}
for _ , p := range try {
if _ , err := os . Stat ( p ) ; err == nil {
return p
}
2020-06-01 23:19:41 +01:00
}
return ""
}
2022-02-11 20:02:38 +00:00
func rateLimitedListenAndServeTLS ( srv * http . Server ) error {
2024-02-07 22:27:29 +00:00
ln , err := net . Listen ( "tcp" , cmp . Or ( srv . Addr , ":https" ) )
2022-02-11 20:02:38 +00:00
if err != nil {
return err
}
rln := newRateLimitedListener ( ln , rate . Limit ( * acceptConnLimit ) , * acceptConnBurst )
expvar . Publish ( "tls_listener" , rln . ExpVar ( ) )
defer rln . Close ( )
return srv . ServeTLS ( rln , "" , "" )
}
type rateLimitedListener struct {
// These are at the start of the struct to ensure 64-bit alignment
// on 32-bit architecture regardless of what other fields may exist
// in this package.
numAccepts expvar . Int // does not include number of rejects
numRejects expvar . Int
net . Listener
lim * rate . Limiter
}
func newRateLimitedListener ( ln net . Listener , limit rate . Limit , burst int ) * rateLimitedListener {
return & rateLimitedListener { Listener : ln , lim : rate . NewLimiter ( limit , burst ) }
}
func ( l * rateLimitedListener ) ExpVar ( ) expvar . Var {
m := new ( metrics . Set )
m . Set ( "counter_accepted_connections" , & l . numAccepts )
m . Set ( "counter_rejected_connections" , & l . numRejects )
return m
}
var errLimitedConn = errors . New ( "cannot accept connection; rate limited" )
func ( l * rateLimitedListener ) Accept ( ) ( net . Conn , error ) {
// Even under a rate limited situation, we accept the connection immediately
// and close it, rather than being slow at accepting new connections.
// This provides two benefits: 1) it signals to the client that something
// is going on on the server, and 2) it prevents new connections from
// piling up and occupying resources in the OS kernel.
// The client will retry as needing (with backoffs in place).
cn , err := l . Listener . Accept ( )
if err != nil {
return nil , err
}
if ! l . lim . Allow ( ) {
l . numRejects . Add ( 1 )
cn . Close ( )
return nil , errLimitedConn
}
l . numAccepts . Add ( 1 )
return cn , nil
}
2022-09-07 21:31:06 +01:00
// logFilter is used to filter out useless error logs that are logged to
// the net/http.Server.ErrorLog logger.
type logFilter struct { }
func ( logFilter ) Write ( p [ ] byte ) ( int , error ) {
b := mem . B ( p )
if mem . HasSuffix ( b , mem . S ( ": EOF\n" ) ) ||
mem . HasSuffix ( b , mem . S ( ": i/o timeout\n" ) ) ||
mem . HasSuffix ( b , mem . S ( ": read: connection reset by peer\n" ) ) ||
mem . HasSuffix ( b , mem . S ( ": remote error: tls: bad certificate\n" ) ) ||
mem . HasSuffix ( b , mem . S ( ": tls: first record does not look like a TLS handshake\n" ) ) {
// Skip this log message, but say that we processed it
return len ( p ) , nil
}
log . Printf ( "%s" , p )
return len ( p ) , nil
}