2020-02-05 22:16:58 +00:00
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package wgengine
import (
"bufio"
2020-04-08 16:42:38 +01:00
"bytes"
2020-02-25 16:06:29 +00:00
"context"
2020-05-17 17:51:38 +01:00
"errors"
2020-02-05 22:16:58 +00:00
"fmt"
2020-04-08 16:42:38 +01:00
"io"
2020-02-05 22:16:58 +00:00
"log"
2020-04-10 21:44:08 +01:00
"os"
"os/exec"
"runtime"
2020-02-05 22:16:58 +00:00
"strings"
"sync"
2020-06-09 19:00:48 +01:00
"sync/atomic"
2020-02-05 22:16:58 +00:00
"time"
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
"github.com/tailscale/wireguard-go/wgcfg"
2020-04-08 16:42:38 +01:00
"go4.org/mem"
2020-07-07 20:25:32 +01:00
"inet.af/netaddr"
2020-06-25 19:04:52 +01:00
"tailscale.com/control/controlclient"
2020-06-28 18:58:21 +01:00
"tailscale.com/internal/deepprint"
2020-03-26 05:57:46 +00:00
"tailscale.com/ipn/ipnstate"
2020-03-13 03:10:11 +00:00
"tailscale.com/net/interfaces"
2020-02-05 22:16:58 +00:00
"tailscale.com/tailcfg"
2020-03-26 05:57:46 +00:00
"tailscale.com/types/key"
2020-02-15 03:23:16 +00:00
"tailscale.com/types/logger"
2020-02-05 22:16:58 +00:00
"tailscale.com/wgengine/filter"
"tailscale.com/wgengine/magicsock"
2020-02-17 17:00:38 +00:00
"tailscale.com/wgengine/monitor"
2020-02-05 22:16:58 +00:00
"tailscale.com/wgengine/packet"
2020-04-30 21:20:09 +01:00
"tailscale.com/wgengine/router"
2020-06-08 23:19:26 +01:00
"tailscale.com/wgengine/tsdns"
2020-05-13 14:16:17 +01:00
"tailscale.com/wgengine/tstun"
2020-02-05 22:16:58 +00:00
)
2020-04-15 00:36:17 +01:00
// minimalMTU is the MTU we set on tailscale's tuntap
// interface. wireguard-go defaults to 1420 bytes, which only works if
// the "outer" MTU is 1500 bytes. This breaks on DSL connections
// (typically 1492 MTU) and on GCE (1460 MTU?!).
//
// 1280 is the smallest MTU allowed for IPv6, which is a sensible
// "probably works everywhere" setting until we develop proper PMTU
// discovery.
const minimalMTU = 1280
2020-07-07 20:25:32 +01:00
const (
magicDNSIP = 0x64646464 // 100.100.100.100
magicDNSPort = 53
)
2020-07-14 14:12:00 +01:00
// magicDNSDomain is the parent domain for Tailscale nodes.
const magicDNSDomain = "b.tailscale.net"
2020-02-05 22:16:58 +00:00
type userspaceEngine struct {
2020-06-09 19:00:48 +01:00
logf logger . Logf
reqCh chan struct { }
waitCh chan struct { } // chan is closed when first Close call completes; contrast with closing bool
tundev * tstun . TUN
wgdev * device . Device
router router . Router
resolver * tsdns . Resolver
useTailscaleDNS bool
magicConn * magicsock . Conn
linkMon * monitor . Mon
// localAddrs is the set of IP addresses assigned to the local
// tunnel interface. It's used to reflect local packets
// incorrectly sent to us.
localAddrs atomic . Value // of map[packet.IP]bool
2020-02-05 22:16:58 +00:00
2020-05-31 07:37:58 +01:00
wgLock sync . Mutex // serializes all wgdev operations; see lock order comment below
lastEngineSig string
lastRouterSig string
lastCfg wgcfg . Config
2020-02-05 22:16:58 +00:00
2020-03-25 15:40:36 +00:00
mu sync . Mutex // guards following; see lock order comment below
2020-05-17 17:51:38 +01:00
closing bool // Close was called (even if we're still closing)
2020-02-28 17:32:06 +00:00
statusCallback StatusCallback
peerSequence [ ] wgcfg . Key
endpoints [ ] string
2020-07-16 05:08:25 +01:00
pingers map [ wgcfg . Key ] * pinger // legacy pingers for pre-discovery peers
2020-03-13 03:10:11 +00:00
linkState * interfaces . State
2020-03-25 15:40:36 +00:00
// Lock ordering: wgLock, then mu.
2020-02-05 22:16:58 +00:00
}
2020-06-08 23:19:26 +01:00
// RouterGen is the signature for a function that creates a
// router.Router.
type RouterGen func ( logf logger . Logf , wgdev * device . Device , tundev tun . Device ) ( router . Router , error )
type EngineConfig struct {
// Logf is the logging function used by the engine.
Logf logger . Logf
// TUN is the tun device used by the engine.
TUN tun . Device
// RouterGen is the function used to instantiate the router.
RouterGen RouterGen
// ListenPort is the port on which the engine will listen.
ListenPort uint16
// EchoRespondToAll determines whether ICMP Echo requests incoming from Tailscale peers
// will be intercepted and responded to, regardless of the source host.
EchoRespondToAll bool
2020-07-07 20:25:32 +01:00
// UseTailscaleDNS determines whether DNS requests for names of the form <mynode>.<mydomain>.<root>
2020-06-08 23:19:26 +01:00
// directed to the designated Taislcale DNS address (see wgengine/tsdns)
// will be intercepted and resolved by a tsdns.Resolver.
UseTailscaleDNS bool
}
2020-02-05 22:16:58 +00:00
type Loggify struct {
f logger . Logf
}
func ( l * Loggify ) Write ( b [ ] byte ) ( int , error ) {
l . f ( string ( b ) )
return len ( b ) , nil
}
2020-02-14 23:03:25 +00:00
func NewFakeUserspaceEngine ( logf logger . Logf , listenPort uint16 ) ( Engine , error ) {
2020-02-05 22:16:58 +00:00
logf ( "Starting userspace wireguard engine (FAKE tuntap device)." )
2020-06-08 23:19:26 +01:00
conf := EngineConfig {
Logf : logf ,
TUN : tstun . NewFakeTUN ( ) ,
RouterGen : router . NewFake ,
ListenPort : listenPort ,
EchoRespondToAll : true ,
}
return NewUserspaceEngineAdvanced ( conf )
2020-02-05 22:16:58 +00:00
}
2020-04-30 21:20:09 +01:00
// NewUserspaceEngine creates the named tun device and returns a
// Tailscale Engine running on it.
2020-02-14 23:03:25 +00:00
func NewUserspaceEngine ( logf logger . Logf , tunname string , listenPort uint16 ) ( Engine , error ) {
2020-02-05 22:16:58 +00:00
if tunname == "" {
2020-02-11 08:01:58 +00:00
return nil , fmt . Errorf ( "--tun name must not be blank" )
2020-02-05 22:16:58 +00:00
}
2020-04-10 21:44:08 +01:00
logf ( "Starting userspace wireguard engine with tun device %q" , tunname )
2020-05-13 14:16:17 +01:00
tun , err := tun . CreateTUN ( tunname , minimalMTU )
2020-02-05 22:16:58 +00:00
if err != nil {
2020-04-10 21:44:08 +01:00
diagnoseTUNFailure ( logf )
2020-04-11 16:35:34 +01:00
logf ( "CreateTUN: %v" , err )
2020-02-05 22:16:58 +00:00
return nil , err
}
2020-04-11 16:35:34 +01:00
logf ( "CreateTUN ok." )
2020-02-05 22:16:58 +00:00
2020-06-08 23:19:26 +01:00
conf := EngineConfig {
Logf : logf ,
TUN : tun ,
RouterGen : router . New ,
ListenPort : listenPort ,
// TODO(dmytro): plumb this down.
UseTailscaleDNS : true ,
}
e , err := NewUserspaceEngineAdvanced ( conf )
2020-02-05 22:16:58 +00:00
if err != nil {
return nil , err
}
return e , err
}
2020-06-08 23:19:26 +01:00
// NewUserspaceEngineAdvanced is like NewUserspaceEngine
// but provides control over all config fields.
func NewUserspaceEngineAdvanced ( conf EngineConfig ) ( Engine , error ) {
return newUserspaceEngineAdvanced ( conf )
2020-02-14 23:03:25 +00:00
}
2020-02-05 22:16:58 +00:00
2020-06-08 23:19:26 +01:00
func newUserspaceEngineAdvanced ( conf EngineConfig ) ( _ Engine , reterr error ) {
logf := conf . Logf
2020-02-05 22:16:58 +00:00
e := & userspaceEngine {
2020-06-09 19:00:48 +01:00
logf : logf ,
reqCh : make ( chan struct { } , 1 ) ,
waitCh : make ( chan struct { } ) ,
tundev : tstun . WrapTUN ( logf , conf . TUN ) ,
2020-07-14 14:12:00 +01:00
resolver : tsdns . NewResolver ( logf , magicDNSDomain ) ,
2020-06-09 19:00:48 +01:00
useTailscaleDNS : conf . UseTailscaleDNS ,
pingers : make ( map [ wgcfg . Key ] * pinger ) ,
2020-02-05 22:16:58 +00:00
}
2020-06-09 19:00:48 +01:00
e . localAddrs . Store ( map [ packet . IP ] bool { } )
2020-03-13 03:10:11 +00:00
e . linkState , _ = getLinkState ( )
2020-02-05 22:16:58 +00:00
2020-06-08 23:19:26 +01:00
// Respond to all pings only in fake mode.
if conf . EchoRespondToAll {
e . tundev . PostFilterIn = echoRespondToAll
}
2020-06-09 19:00:48 +01:00
e . tundev . PreFilterOut = e . handleLocalPackets
2020-06-08 23:19:26 +01:00
2020-02-17 17:00:38 +00:00
mon , err := monitor . New ( logf , func ( ) { e . LinkChange ( false ) } )
if err != nil {
2020-06-08 23:19:26 +01:00
e . tundev . Close ( )
2020-02-17 17:00:38 +00:00
return nil , err
}
e . linkMon = mon
2020-02-05 22:16:58 +00:00
endpointsFn := func ( endpoints [ ] string ) {
e . mu . Lock ( )
2020-02-18 16:57:11 +00:00
e . endpoints = append ( e . endpoints [ : 0 ] , endpoints ... )
2020-02-05 22:16:58 +00:00
e . mu . Unlock ( )
e . RequestStatus ( )
}
magicsockOpts := magicsock . Options {
Add tstest.PanicOnLog(), and fix various problems detected by this.
If a test calls log.Printf, 'go test' horrifyingly rearranges the
output to no longer be in chronological order, which makes debugging
virtually impossible. Let's stop that from happening by making
log.Printf panic if called from any module, no matter how deep, during
tests.
This required us to change the default error handler in at least one
http.Server, as well as plumbing a bunch of logf functions around,
especially in magicsock and wgengine, but also in logtail and backoff.
To add insult to injury, 'go test' also rearranges the output when a
parent test has multiple sub-tests (all the sub-test's t.Logf is always
printed after all the parent tests t.Logf), so we need to screw around
with a special Logf that can point at the "current" t (current_t.Logf)
in some places. Probably our entire way of using subtests is wrong,
since 'go test' would probably like to run them all in parallel if you
called t.Parallel(), but it definitely can't because the're all
manipulating the shared state created by the parent test. They should
probably all be separate toplevel tests instead, with common
setup/teardown logic. But that's a job for another time.
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
2020-05-14 03:59:54 +01:00
Logf : logf ,
2020-06-08 23:19:26 +01:00
Port : conf . ListenPort ,
2020-02-05 22:16:58 +00:00
EndpointsFunc : endpointsFn ,
2020-06-25 22:19:12 +01:00
IdleFunc : e . tundev . IdleDuration ,
2020-02-05 22:16:58 +00:00
}
2020-05-17 17:51:38 +01:00
e . magicConn , err = magicsock . NewConn ( magicsockOpts )
2020-02-05 22:16:58 +00:00
if err != nil {
2020-06-08 23:19:26 +01:00
e . tundev . Close ( )
2020-02-05 22:16:58 +00:00
return nil , fmt . Errorf ( "wgengine: %v" , err )
}
// flags==0 because logf is already nested in another logger.
// The outer one can display the preferred log prefixes, etc.
dlog := log . New ( & Loggify { logf } , "" , 0 )
logger := device . Logger {
Debug : dlog ,
Info : dlog ,
Error : dlog ,
}
opts := & device . DeviceOptions {
2020-05-13 14:16:17 +01:00
Logger : & logger ,
2020-07-16 04:30:45 +01:00
HandshakeDone : func ( peerKey wgcfg . Key , peer * device . Peer , deviceAllowedIPs * device . AllowedIPs ) {
2020-02-05 22:16:58 +00:00
// Send an unsolicited status event every time a
// handshake completes. This makes sure our UI can
// update quickly as soon as it connects to a peer.
//
// We use a goroutine here to avoid deadlocking
// wireguard, since RequestStatus() will call back
// into it, and wireguard is what called us to get
// here.
go e . RequestStatus ( )
2020-02-25 16:06:29 +00:00
2020-07-16 05:08:25 +01:00
if e . magicConn . PeerHasDiscoKey ( tailcfg . NodeKey ( peerKey ) ) {
e . logf ( "wireguard handshake complete for %v" , peerKey . ShortString ( ) )
// This is a modern peer with discovery support. No need to send pings.
return
}
e . logf ( "wireguard handshake complete for %v; sending legacy pings" , peerKey . ShortString ( ) )
2020-03-08 11:08:38 +00:00
// Ping every single-IP that peer routes.
// These synthetic packets are used to traverse NATs.
var ips [ ] wgcfg . IP
2020-07-16 04:30:45 +01:00
allowedIPs := deviceAllowedIPs . EntriesForPeer ( peer )
2020-03-08 11:08:38 +00:00
for _ , ipNet := range allowedIPs {
if ones , bits := ipNet . Mask . Size ( ) ; ones == bits && ones != 0 {
2020-02-25 16:06:29 +00:00
var ip wgcfg . IP
2020-03-08 11:08:38 +00:00
copy ( ip . Addr [ : ] , ipNet . IP . To16 ( ) )
ips = append ( ips , ip )
2020-02-25 16:06:29 +00:00
}
}
2020-03-08 11:08:38 +00:00
if len ( ips ) > 0 {
go e . pinger ( peerKey , ips )
} else {
logf ( "[unexpected] peer %s has no single-IP routes: %v" , peerKey . ShortString ( ) , allowedIPs )
}
2020-02-05 22:16:58 +00:00
} ,
2020-03-03 15:39:02 +00:00
CreateBind : e . magicConn . CreateBind ,
2020-02-05 22:16:58 +00:00
CreateEndpoint : e . magicConn . CreateEndpoint ,
SkipBindUpdate : true ,
}
2020-05-10 23:34:16 +01:00
// wgdev takes ownership of tundev, will close it when closed.
2020-02-14 23:03:25 +00:00
e . wgdev = device . NewDevice ( e . tundev , opts )
defer func ( ) {
if reterr != nil {
e . wgdev . Close ( )
}
} ( )
2020-05-15 08:06:30 +01:00
// Pass the underlying tun.(*NativeDevice) to the router:
// routers do not Read or Write, but do access native interfaces.
2020-06-08 23:19:26 +01:00
e . router , err = conf . RouterGen ( logf , e . wgdev , e . tundev . Unwrap ( ) )
2020-02-14 23:03:25 +00:00
if err != nil {
2020-05-17 17:51:38 +01:00
e . magicConn . Close ( )
2020-02-14 23:03:25 +00:00
return nil , err
}
2020-02-05 22:16:58 +00:00
go func ( ) {
up := false
2020-02-14 23:03:25 +00:00
for event := range e . tundev . Events ( ) {
2020-02-05 22:16:58 +00:00
if event & tun . EventMTUUpdate != 0 {
2020-02-14 23:03:25 +00:00
mtu , err := e . tundev . MTU ( )
2020-02-05 22:16:58 +00:00
e . logf ( "external route MTU: %d (%v)" , mtu , err )
}
if event & tun . EventUp != 0 && ! up {
e . logf ( "external route: up" )
e . RequestStatus ( )
up = true
}
if event & tun . EventDown != 0 && up {
e . logf ( "external route: down" )
e . RequestStatus ( )
up = false
}
}
} ( )
e . wgdev . Up ( )
if err := e . router . Up ( ) ; err != nil {
2020-05-17 17:51:38 +01:00
e . magicConn . Close ( )
2020-02-05 22:16:58 +00:00
e . wgdev . Close ( )
return nil , err
}
2020-05-11 21:16:52 +01:00
// TODO(danderson): we should delete this. It's pointless to apply
// a no-op settings here.
2020-05-12 08:08:52 +01:00
if err := e . router . Set ( nil ) ; err != nil {
2020-05-17 17:51:38 +01:00
e . magicConn . Close ( )
2020-02-05 22:16:58 +00:00
e . wgdev . Close ( )
return nil , err
}
2020-02-17 17:00:38 +00:00
e . linkMon . Start ( )
2020-05-17 17:51:38 +01:00
e . magicConn . Start ( )
2020-02-05 22:16:58 +00:00
2020-07-07 20:25:32 +01:00
e . resolver . Start ( )
go e . pollResolver ( )
2020-02-05 22:16:58 +00:00
return e , nil
}
2020-06-08 23:19:26 +01:00
// echoRespondToAll is an inbound post-filter responding to all echo requests.
func echoRespondToAll ( p * packet . ParsedPacket , t * tstun . TUN ) filter . Response {
if p . IsEchoRequest ( ) {
header := p . ICMPHeader ( )
header . ToResponse ( )
packet := packet . Generate ( & header , p . Payload ( ) )
t . InjectOutbound ( packet )
// We already handled it, stop.
return filter . Drop
}
return filter . Accept
}
2020-06-09 19:00:48 +01:00
// handleLocalPackets inspects packets coming from the local network
// stack, and intercepts any packets that should be handled by
// tailscaled directly. Other packets are allowed to proceed into the
// main ACL filter.
func ( e * userspaceEngine ) handleLocalPackets ( p * packet . ParsedPacket , t * tstun . TUN ) filter . Response {
if e . useTailscaleDNS {
if verdict := e . handleDNS ( p , t ) ; verdict == filter . Drop {
// local DNS handled the packet.
return filter . Drop
}
}
if runtime . GOOS == "darwin" && e . isLocalAddr ( p . DstIP ) {
// macOS NetworkExtension directs packets destined to the
// tunnel's local IP address into the tunnel, instead of
// looping back within the kernel network stack. We have to
// notice that an outbound packet is actually destined for
// ourselves, and loop it back into macOS.
t . InjectInboundCopy ( p . Buffer ( ) )
return filter . Drop
}
return filter . Accept
}
func ( e * userspaceEngine ) isLocalAddr ( ip packet . IP ) bool {
localAddrs , ok := e . localAddrs . Load ( ) . ( map [ packet . IP ] bool )
if ! ok {
e . logf ( "[unexpected] e.localAddrs was nil, can't check for loopback packet" )
return false
}
return localAddrs [ ip ]
}
2020-06-08 23:19:26 +01:00
// handleDNS is an outbound pre-filter resolving Tailscale domains.
func ( e * userspaceEngine ) handleDNS ( p * packet . ParsedPacket , t * tstun . TUN ) filter . Response {
2020-07-07 20:25:32 +01:00
if p . DstIP == magicDNSIP && p . DstPort == magicDNSPort && p . IPProto == packet . UDP {
request := tsdns . Packet {
Payload : p . Payload ( ) ,
Addr : netaddr . IPPort { IP : p . SrcIP . Netaddr ( ) , Port : p . SrcPort } ,
}
err := e . resolver . EnqueueRequest ( request )
2020-06-08 23:19:26 +01:00
if err != nil {
2020-07-07 20:25:32 +01:00
e . logf ( "tsdns: enqueue: %v" , err )
2020-06-08 23:19:26 +01:00
}
return filter . Drop
}
return filter . Accept
}
2020-07-07 20:25:32 +01:00
// pollResolver reads responses from the DNS resolver and injects them inbound.
func ( e * userspaceEngine ) pollResolver ( ) {
for {
resp , err := e . resolver . NextResponse ( )
if err == tsdns . ErrClosed {
return
}
if err != nil {
e . logf ( "tsdns: error: %v" , err )
continue
}
h := packet . UDPHeader {
IPHeader : packet . IPHeader {
SrcIP : packet . IP ( magicDNSIP ) ,
DstIP : packet . IPFromNetaddr ( resp . Addr . IP ) ,
} ,
SrcPort : magicDNSPort ,
DstPort : resp . Addr . Port ,
}
hlen := h . Len ( )
// TODO(dmytro): avoid this allocation without importing tstun quirks into tsdns.
const offset = tstun . PacketStartOffset
buf := make ( [ ] byte , offset + hlen + len ( resp . Payload ) )
copy ( buf [ offset + hlen : ] , resp . Payload )
h . Marshal ( buf [ offset : ] )
e . tundev . InjectInboundDirect ( buf , offset )
}
}
2020-02-28 11:30:46 +00:00
// pinger sends ping packets for a few seconds.
2020-02-25 16:06:29 +00:00
//
// These generated packets are used to ensure we trigger the spray logic in
// the magicsock package for NAT traversal.
2020-07-16 05:08:25 +01:00
//
// These are only used with legacy peers (before 0.100.0) that don't
// have advertised discovery keys.
2020-05-29 06:38:26 +01:00
type pinger struct {
e * userspaceEngine
done chan struct { } // closed after shutdown (not the ctx.Done() chan)
cancel context . CancelFunc
}
2020-02-25 16:06:29 +00:00
2020-05-29 06:38:26 +01:00
// close cleans up pinger and removes it from the userspaceEngine.pingers map.
// It cannot be called while p.e.mu is held.
func ( p * pinger ) close ( ) {
p . cancel ( )
<- p . done
}
2020-02-25 16:06:29 +00:00
2020-05-29 06:38:26 +01:00
func ( p * pinger ) run ( ctx context . Context , peerKey wgcfg . Key , ips [ ] wgcfg . IP , srcIP packet . IP ) {
defer func ( ) {
p . e . mu . Lock ( )
if p . e . pingers [ peerKey ] == p {
delete ( p . e . pingers , peerKey )
}
p . e . mu . Unlock ( )
2020-02-25 16:06:29 +00:00
2020-05-29 06:38:26 +01:00
close ( p . done )
} ( )
2020-02-25 16:06:29 +00:00
2020-06-04 23:42:44 +01:00
header := packet . ICMPHeader {
IPHeader : packet . IPHeader {
SrcIP : srcIP ,
} ,
Type : packet . ICMPEchoRequest ,
Code : packet . ICMPNoCode ,
}
2020-02-25 16:06:29 +00:00
// sendFreq is slightly longer than sprayFreq in magicsock to ensure
// that if these ping packets are the only source of early packets
// sent to the peer, that each one will be sprayed.
const sendFreq = 300 * time . Millisecond
const stopAfter = 3 * time . Second
start := time . Now ( )
2020-03-08 11:08:38 +00:00
var dstIPs [ ] packet . IP
for _ , ip := range ips {
dstIPs = append ( dstIPs , packet . NewIP ( ip . IP ( ) ) )
}
2020-02-25 16:06:29 +00:00
payload := [ ] byte ( "magicsock_spray" ) // no meaning
2020-06-04 23:42:44 +01:00
header . IPID = 1
2020-02-28 11:30:46 +00:00
t := time . NewTicker ( sendFreq )
defer t . Stop ( )
for {
select {
case <- ctx . Done ( ) :
return
case <- t . C :
}
if time . Since ( start ) > stopAfter {
return
}
2020-03-08 11:08:38 +00:00
for _ , dstIP := range dstIPs {
2020-06-04 23:42:44 +01:00
header . DstIP = dstIP
// InjectOutbound take ownership of the packet, so we allocate.
b := packet . Generate ( & header , payload )
2020-05-29 06:38:26 +01:00
p . e . tundev . InjectOutbound ( b )
2020-03-08 11:08:38 +00:00
}
2020-06-04 23:42:44 +01:00
header . IPID ++
2020-02-28 11:30:46 +00:00
}
2020-05-29 06:38:26 +01:00
}
// pinger sends ping packets for a few seconds.
//
// These generated packets are used to ensure we trigger the spray logic in
// the magicsock package for NAT traversal.
2020-07-16 05:08:25 +01:00
//
// This is only used with legacy peers (before 0.100.0) that don't
// have advertised discovery keys.
2020-05-29 06:38:26 +01:00
func ( e * userspaceEngine ) pinger ( peerKey wgcfg . Key , ips [ ] wgcfg . IP ) {
e . logf ( "generating initial ping traffic to %s (%v)" , peerKey . ShortString ( ) , ips )
var srcIP packet . IP
e . wgLock . Lock ( )
if len ( e . lastCfg . Addresses ) > 0 {
srcIP = packet . NewIP ( e . lastCfg . Addresses [ 0 ] . IP . IP ( ) )
}
e . wgLock . Unlock ( )
if srcIP == 0 {
e . logf ( "generating initial ping traffic: no source IP" )
return
}
ctx , cancel := context . WithCancel ( context . Background ( ) )
p := & pinger {
e : e ,
done : make ( chan struct { } ) ,
cancel : cancel ,
}
e . mu . Lock ( )
if e . closing {
e . mu . Unlock ( )
return
}
oldPinger := e . pingers [ peerKey ]
e . pingers [ peerKey ] = p
e . mu . Unlock ( )
if oldPinger != nil {
oldPinger . close ( )
}
p . run ( ctx , peerKey , ips , srcIP )
2020-02-25 16:06:29 +00:00
}
2020-06-28 18:58:21 +01:00
func updateSig ( last * string , v interface { } ) ( changed bool ) {
sig := deepprint . Hash ( v )
if * last != sig {
* last = sig
return true
2020-05-08 02:07:13 +01:00
}
2020-06-28 18:58:21 +01:00
return false
2020-05-08 02:07:13 +01:00
}
2020-05-12 08:08:52 +01:00
func ( e * userspaceEngine ) Reconfig ( cfg * wgcfg . Config , routerCfg * router . Config ) error {
2020-05-31 07:36:57 +01:00
if routerCfg == nil {
panic ( "routerCfg must not be nil" )
}
2020-06-09 19:00:48 +01:00
localAddrs := map [ packet . IP ] bool { }
for _ , addr := range routerCfg . LocalAddrs {
// TODO: ipv6
if ! addr . IP . Is4 ( ) {
continue
}
2020-07-14 14:12:00 +01:00
localAddrs [ packet . IPFromNetaddr ( addr . IP ) ] = true
2020-06-09 19:00:48 +01:00
}
e . localAddrs . Store ( localAddrs )
2020-02-05 22:16:58 +00:00
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
2020-04-18 16:48:01 +01:00
peerSet := make ( map [ key . Public ] struct { } , len ( cfg . Peers ) )
2020-02-25 16:06:29 +00:00
e . mu . Lock ( )
2020-04-10 16:22:13 +01:00
e . peerSequence = e . peerSequence [ : 0 ]
for _ , p := range cfg . Peers {
e . peerSequence = append ( e . peerSequence , p . PublicKey )
2020-04-18 16:48:01 +01:00
peerSet [ key . Public ( p . PublicKey ) ] = struct { } { }
2020-02-05 22:16:58 +00:00
}
2020-02-25 16:06:29 +00:00
e . mu . Unlock ( )
2020-02-05 22:16:58 +00:00
2020-07-14 14:12:00 +01:00
// If the only nameserver is quad 100 (Magic DNS), set up the resolver appropriately.
if len ( routerCfg . Nameservers ) == 1 && routerCfg . Nameservers [ 0 ] == packet . IP ( magicDNSIP ) . Netaddr ( ) {
// TODO(dmytro): plumb dnsReadConfig here instead of hardcoding this.
e . resolver . SetNameservers ( [ ] string { "8.8.8.8:53" } )
routerCfg . Domains = append ( [ ] string { magicDNSDomain } , routerCfg . Domains ... )
}
2020-06-28 18:58:21 +01:00
engineChanged := updateSig ( & e . lastEngineSig , cfg )
routerChanged := updateSig ( & e . lastRouterSig , routerCfg )
2020-05-31 07:37:58 +01:00
if ! engineChanged && ! routerChanged {
2020-04-10 16:42:34 +01:00
return ErrNoChanges
2020-02-05 22:16:58 +00:00
}
2020-05-31 07:37:58 +01:00
e . lastCfg = cfg . Copy ( )
2020-04-10 16:42:34 +01:00
2020-05-31 07:37:58 +01:00
if engineChanged {
2020-06-19 06:07:20 +01:00
e . logf ( "wgengine: Reconfig: configuring userspace wireguard config" )
2020-05-31 07:37:58 +01:00
// Tell magicsock about the new (or initial) private key
// (which is needed by DERP) before wgdev gets it, as wgdev
// will start trying to handshake, which we want to be able to
// go over DERP.
if err := e . magicConn . SetPrivateKey ( cfg . PrivateKey ) ; err != nil {
e . logf ( "wgengine: Reconfig: SetPrivateKey: %v" , err )
}
2020-02-28 19:13:28 +00:00
2020-05-31 07:37:58 +01:00
if err := e . wgdev . Reconfig ( cfg ) ; err != nil {
e . logf ( "wgdev.Reconfig: %v" , err )
return err
}
2020-02-05 22:16:58 +00:00
2020-05-31 07:37:58 +01:00
e . magicConn . UpdatePeers ( peerSet )
}
2020-04-18 16:48:01 +01:00
2020-05-31 07:37:58 +01:00
if routerChanged {
2020-06-19 06:07:20 +01:00
e . logf ( "wgengine: Reconfig: configuring router" )
2020-05-31 07:37:58 +01:00
if err := e . router . Set ( routerCfg ) ; err != nil {
return err
}
2020-02-05 22:16:58 +00:00
}
2020-03-02 22:54:57 +00:00
2020-04-10 16:42:34 +01:00
e . logf ( "wgengine: Reconfig done" )
2020-03-02 22:54:57 +00:00
return nil
2020-02-05 22:16:58 +00:00
}
2020-03-25 07:47:55 +00:00
func ( e * userspaceEngine ) GetFilter ( ) * filter . Filter {
2020-05-13 14:16:17 +01:00
return e . tundev . GetFilter ( )
2020-03-25 07:47:55 +00:00
}
2020-02-05 22:16:58 +00:00
func ( e * userspaceEngine ) SetFilter ( filt * filter . Filter ) {
2020-05-13 14:16:17 +01:00
e . tundev . SetFilter ( filt )
2020-02-05 22:16:58 +00:00
}
2020-06-08 23:19:26 +01:00
func ( e * userspaceEngine ) SetDNSMap ( dm * tsdns . Map ) {
e . resolver . SetMap ( dm )
}
2020-02-05 22:16:58 +00:00
func ( e * userspaceEngine ) SetStatusCallback ( cb StatusCallback ) {
2020-02-28 17:32:06 +00:00
e . mu . Lock ( )
defer e . mu . Unlock ( )
2020-02-05 22:16:58 +00:00
e . statusCallback = cb
}
2020-02-28 17:32:06 +00:00
func ( e * userspaceEngine ) getStatusCallback ( ) StatusCallback {
e . mu . Lock ( )
defer e . mu . Unlock ( )
return e . statusCallback
}
2020-04-08 16:42:38 +01:00
// TODO: this function returns an error but it's always nil, and when
// there's actually a problem it just calls log.Fatal. Why?
2020-02-05 22:16:58 +00:00
func ( e * userspaceEngine ) getStatus ( ) ( * Status , error ) {
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
2020-05-17 17:51:38 +01:00
e . mu . Lock ( )
closing := e . closing
e . mu . Unlock ( )
if closing {
return nil , errors . New ( "engine closing; no status" )
}
2020-02-05 22:16:58 +00:00
if e . wgdev == nil {
// RequestStatus was invoked before the wgengine has
// finished initializing. This can happen when wgegine
// provides a callback to magicsock for endpoint
// updates that calls RequestStatus.
return nil , nil
}
2020-04-08 16:42:38 +01:00
// lineLen is the max UAPI line we expect. The longest I see is
// len("preshared_key=")+64 hex+"\n" == 79. Add some slop.
const lineLen = 100
pr , pw := io . Pipe ( )
errc := make ( chan error , 1 )
go func ( ) {
defer pw . Close ( )
bw := bufio . NewWriterSize ( pw , lineLen )
// TODO(apenwarr): get rid of silly uapi stuff for in-process comms
// FIXME: get notified of status changes instead of polling.
2020-07-16 04:30:45 +01:00
filter := device . IPCGetFilter {
// The allowed_ips are somewhat expensive to compute and they're
// unused below; request that they not be sent instead.
FilterAllowedIPs : true ,
}
if err := e . wgdev . IpcGetOperationFiltered ( bw , filter ) ; err != nil {
2020-04-08 16:42:38 +01:00
errc <- fmt . Errorf ( "IpcGetOperation: %w" , err )
return
}
errc <- bw . Flush ( )
} ( )
2020-02-05 22:16:58 +00:00
pp := make ( map [ wgcfg . Key ] * PeerStatus )
2020-04-08 16:42:38 +01:00
p := & PeerStatus { }
2020-02-05 22:16:58 +00:00
var hst1 , hst2 , n int64
var err error
2020-04-08 16:42:38 +01:00
bs := bufio . NewScanner ( pr )
bs . Buffer ( make ( [ ] byte , lineLen ) , lineLen )
for bs . Scan ( ) {
line := bs . Bytes ( )
k := line
var v mem . RO
if i := bytes . IndexByte ( line , '=' ) ; i != - 1 {
k = line [ : i ]
v = mem . B ( line [ i + 1 : ] )
2020-02-05 22:16:58 +00:00
}
2020-04-08 16:42:38 +01:00
switch string ( k ) {
2020-02-05 22:16:58 +00:00
case "public_key" :
2020-04-08 16:42:38 +01:00
pk , err := key . NewPublicFromHexMem ( v )
2020-02-05 22:16:58 +00:00
if err != nil {
2020-04-11 16:35:34 +01:00
log . Fatalf ( "IpcGetOperation: invalid key %#v" , v )
2020-02-05 22:16:58 +00:00
}
p = & PeerStatus { }
2020-04-08 16:42:38 +01:00
pp [ wgcfg . Key ( pk ) ] = p
2020-02-05 22:16:58 +00:00
2020-02-11 03:04:52 +00:00
key := tailcfg . NodeKey ( pk )
2020-02-05 22:16:58 +00:00
p . NodeKey = key
case "rx_bytes" :
2020-06-01 04:22:46 +01:00
n , err = mem . ParseInt ( v , 10 , 64 )
2020-02-05 22:16:58 +00:00
p . RxBytes = ByteCount ( n )
if err != nil {
2020-04-11 16:35:34 +01:00
log . Fatalf ( "IpcGetOperation: rx_bytes invalid: %#v" , line )
2020-02-05 22:16:58 +00:00
}
case "tx_bytes" :
2020-06-01 04:22:46 +01:00
n , err = mem . ParseInt ( v , 10 , 64 )
2020-02-05 22:16:58 +00:00
p . TxBytes = ByteCount ( n )
if err != nil {
2020-04-11 16:35:34 +01:00
log . Fatalf ( "IpcGetOperation: tx_bytes invalid: %#v" , line )
2020-02-05 22:16:58 +00:00
}
case "last_handshake_time_sec" :
2020-06-01 04:22:46 +01:00
hst1 , err = mem . ParseInt ( v , 10 , 64 )
2020-02-05 22:16:58 +00:00
if err != nil {
2020-04-11 16:35:34 +01:00
log . Fatalf ( "IpcGetOperation: hst1 invalid: %#v" , line )
2020-02-05 22:16:58 +00:00
}
case "last_handshake_time_nsec" :
2020-06-01 04:22:46 +01:00
hst2 , err = mem . ParseInt ( v , 10 , 64 )
2020-02-05 22:16:58 +00:00
if err != nil {
2020-04-11 16:35:34 +01:00
log . Fatalf ( "IpcGetOperation: hst2 invalid: %#v" , line )
2020-02-05 22:16:58 +00:00
}
if hst1 != 0 || hst2 != 0 {
p . LastHandshake = time . Unix ( hst1 , hst2 )
} // else leave at time.IsZero()
}
}
2020-04-08 16:42:38 +01:00
if err := bs . Err ( ) ; err != nil {
log . Fatalf ( "reading IpcGetOperation output: %v" , err )
}
if err := <- errc ; err != nil {
log . Fatalf ( "IpcGetOperation: %v" , err )
}
2020-02-05 22:16:58 +00:00
e . mu . Lock ( )
defer e . mu . Unlock ( )
var peers [ ] PeerStatus
for _ , pk := range e . peerSequence {
p := pp [ pk ]
if p == nil {
p = & PeerStatus { }
}
peers = append ( peers , * p )
}
if len ( pp ) != len ( e . peerSequence ) {
2020-04-11 16:35:34 +01:00
e . logf ( "wg status returned %v peers, expected %v" , len ( pp ) , len ( e . peerSequence ) )
2020-02-05 22:16:58 +00:00
}
return & Status {
LocalAddrs : append ( [ ] string ( nil ) , e . endpoints ... ) ,
Peers : peers ,
2020-03-19 06:55:14 +00:00
DERPs : e . magicConn . DERPs ( ) ,
2020-02-05 22:16:58 +00:00
} , nil
}
func ( e * userspaceEngine ) RequestStatus ( ) {
// This is slightly tricky. e.getStatus() can theoretically get
// blocked inside wireguard for a while, and RequestStatus() is
// sometimes called from a goroutine, so we don't want a lot of
// them hanging around. On the other hand, requesting multiple
// status updates simultaneously is pointless anyway; they will
// all say the same thing.
// Enqueue at most one request. If one is in progress already, this
// adds one more to the queue. If one has been requested but not
// started, it is a no-op.
select {
case e . reqCh <- struct { } { } :
default :
}
// Dequeue at most one request. Another thread may have already
// dequeued the request we enqueued above, which is fine, since the
// information is guaranteed to be at least as recent as the current
// call to RequestStatus().
select {
case <- e . reqCh :
s , err := e . getStatus ( )
if s == nil && err == nil {
2020-04-11 16:35:34 +01:00
e . logf ( "RequestStatus: weird: both s and err are nil" )
2020-02-05 22:16:58 +00:00
return
}
2020-02-28 17:32:06 +00:00
if cb := e . getStatusCallback ( ) ; cb != nil {
cb ( s , err )
2020-02-05 22:16:58 +00:00
}
default :
}
}
func ( e * userspaceEngine ) Close ( ) {
2020-05-29 06:38:26 +01:00
var pingers [ ] * pinger
2020-02-25 16:06:29 +00:00
e . mu . Lock ( )
2020-05-17 17:51:38 +01:00
if e . closing {
e . mu . Unlock ( )
return
}
e . closing = true
2020-05-29 06:38:26 +01:00
for _ , pinger := range e . pingers {
pingers = append ( pingers , pinger )
2020-02-25 16:06:29 +00:00
}
e . mu . Unlock ( )
2020-02-20 17:47:33 +00:00
r := bufio . NewReader ( strings . NewReader ( "" ) )
e . wgdev . IpcSetOperation ( r )
2020-07-07 20:25:32 +01:00
e . resolver . Close ( )
2020-07-03 08:00:04 +01:00
e . magicConn . Close ( )
2020-02-17 17:00:38 +00:00
e . linkMon . Close ( )
2020-02-05 22:16:58 +00:00
e . router . Close ( )
2020-07-03 08:00:04 +01:00
e . wgdev . Close ( )
2020-05-29 06:38:26 +01:00
// Shut down pingers after tundev is closed (by e.wgdev.Close) so the
// synchronous close does not get stuck on InjectOutbound.
for _ , pinger := range pingers {
pinger . close ( )
}
2020-02-05 22:16:58 +00:00
close ( e . waitCh )
}
func ( e * userspaceEngine ) Wait ( ) {
<- e . waitCh
}
2020-03-13 03:10:11 +00:00
func ( e * userspaceEngine ) setLinkState ( st * interfaces . State ) ( changed bool ) {
if st == nil {
return false
}
e . mu . Lock ( )
defer e . mu . Unlock ( )
changed = e . linkState == nil || ! st . Equal ( e . linkState )
e . linkState = st
return changed
}
2020-02-05 22:16:58 +00:00
func ( e * userspaceEngine ) LinkChange ( isExpensive bool ) {
2020-03-13 03:10:11 +00:00
cur , err := getLinkState ( )
if err != nil {
e . logf ( "LinkChange: interfaces.GetState: %v" , err )
return
}
2020-04-10 03:10:55 +01:00
cur . IsExpensive = isExpensive
2020-03-13 03:10:11 +00:00
needRebind := e . setLinkState ( cur )
e . logf ( "LinkChange(isExpensive=%v); needsRebind=%v" , isExpensive , needRebind )
why := "link-change-minor"
if needRebind {
why = "link-change-major"
e . magicConn . Rebind ( )
}
e . magicConn . ReSTUN ( why )
2020-02-05 22:16:58 +00:00
}
2020-03-04 06:21:56 +00:00
2020-03-13 03:10:11 +00:00
func getLinkState ( ) ( * interfaces . State , error ) {
s , err := interfaces . GetState ( )
if s != nil {
s . RemoveTailscaleInterfaces ( )
}
return s , err
}
2020-03-04 06:21:56 +00:00
func ( e * userspaceEngine ) SetNetInfoCallback ( cb NetInfoCallback ) {
e . magicConn . SetNetInfoCallback ( cb )
}
2020-03-04 20:21:40 +00:00
2020-05-17 17:51:38 +01:00
func ( e * userspaceEngine ) SetDERPMap ( dm * tailcfg . DERPMap ) {
e . magicConn . SetDERPMap ( dm )
2020-03-04 20:21:40 +00:00
}
2020-03-26 05:57:46 +00:00
2020-06-25 19:04:52 +01:00
func ( e * userspaceEngine ) SetNetworkMap ( nm * controlclient . NetworkMap ) {
e . magicConn . SetNetworkMap ( nm )
}
2020-07-06 20:10:39 +01:00
func ( e * userspaceEngine ) DiscoPublicKey ( ) tailcfg . DiscoKey {
return e . magicConn . DiscoPublicKey ( )
2020-06-19 20:06:49 +01:00
}
2020-03-26 05:57:46 +00:00
func ( e * userspaceEngine ) UpdateStatus ( sb * ipnstate . StatusBuilder ) {
st , err := e . getStatus ( )
if err != nil {
e . logf ( "wgengine: getStatus: %v" , err )
return
}
for _ , ps := range st . Peers {
sb . AddPeer ( key . Public ( ps . NodeKey ) , & ipnstate . PeerStatus {
RxBytes : int64 ( ps . RxBytes ) ,
TxBytes : int64 ( ps . TxBytes ) ,
LastHandshake : ps . LastHandshake ,
InEngine : true ,
} )
}
e . magicConn . UpdateStatus ( sb )
}
2020-04-10 21:44:08 +01:00
// diagnoseTUNFailure is called if tun.CreateTUN fails, to poke around
// the system and log some diagnostic info that might help debug why
// TUN failed. Because TUN's already failed and things the program's
// about to end, we might as well log a lot.
func diagnoseTUNFailure ( logf logger . Logf ) {
switch runtime . GOOS {
case "linux" :
diagnoseLinuxTUNFailure ( logf )
default :
logf ( "no TUN failure diagnostics for OS %q" , runtime . GOOS )
}
}
func diagnoseLinuxTUNFailure ( logf logger . Logf ) {
kernel , err := exec . Command ( "uname" , "-r" ) . Output ( )
kernel = bytes . TrimSpace ( kernel )
if err != nil {
logf ( "no TUN, and failed to look up kernel version: %v" , err )
return
}
logf ( "Linux kernel version: %s" , kernel )
modprobeOut , err := exec . Command ( "/sbin/modprobe" , "tun" ) . CombinedOutput ( )
if err == nil {
logf ( "'modprobe tun' successful" )
// Either tun is currently loaded, or it's statically
// compiled into the kernel (which modprobe checks
// with /lib/modules/$(uname -r)/modules.builtin)
//
// So if there's a problem at this point, it's
// probably because /dev/net/tun doesn't exist.
const dev = "/dev/net/tun"
if fi , err := os . Stat ( dev ) ; err != nil {
logf ( "tun module loaded in kernel, but %s does not exist" , dev )
} else {
logf ( "%s: %v" , dev , fi . Mode ( ) )
}
// We failed to find why it failed. Just let our
// caller report the error it got from wireguard-go.
return
}
logf ( "is CONFIG_TUN enabled in your kernel? `modprobe tun` failed with: %s" , modprobeOut )
distro := linuxDistro ( )
switch distro {
case "debian" :
dpkgOut , err := exec . Command ( "dpkg" , "-S" , "kernel/drivers/net/tun.ko" ) . CombinedOutput ( )
if len ( bytes . TrimSpace ( dpkgOut ) ) == 0 || err != nil {
logf ( "tun module not loaded nor found on disk" )
return
}
if ! bytes . Contains ( dpkgOut , kernel ) {
logf ( "kernel/drivers/net/tun.ko found on disk, but not for current kernel; are you in middle of a system update and haven't rebooted? found: %s" , dpkgOut )
}
2020-04-13 17:22:08 +01:00
case "arch" :
findOut , err := exec . Command ( "find" , "/lib/modules/" , "-path" , "*/net/tun.ko*" ) . CombinedOutput ( )
if len ( bytes . TrimSpace ( findOut ) ) == 0 || err != nil {
logf ( "tun module not loaded nor found on disk" )
return
}
if ! bytes . Contains ( findOut , kernel ) {
logf ( "kernel/drivers/net/tun.ko found on disk, but not for current kernel; are you in middle of a system update and haven't rebooted? found: %s" , findOut )
}
2020-04-10 21:44:08 +01:00
}
}
func linuxDistro ( ) string {
if _ , err := os . Stat ( "/etc/debian_version" ) ; err == nil {
return "debian"
}
2020-04-13 17:22:08 +01:00
if _ , err := os . Stat ( "/etc/arch-release" ) ; err == nil {
return "arch"
}
2020-04-10 21:44:08 +01:00
return ""
}