control/controlclient: cache control key

This can allow us to continue to communicate with control when moving to
a network that does TLS MiTM, which would otherwise prevent us from
being able to fetch the Noise key and establish a connection.

Updates #3198 (sorta)

Change-Id: I52caf5079de744874a2bdd0c9ffb9e8f087ff8e0
Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
Andrew Dunham 2023-03-04 00:15:50 -05:00
parent 62cf83eb92
commit 98ab538b1b
3 changed files with 81 additions and 34 deletions

View File

@ -354,6 +354,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/types/structs from tailscale.com/control/controlclient+
tailscale.com/types/tkatype from tailscale.com/tka+
tailscale.com/types/views from tailscale.com/ipn/ipnlocal+
tailscale.com/util/cache from tailscale.com/control/controlclient+
tailscale.com/util/clientmetric from tailscale.com/control/controlclient+
tailscale.com/util/cloudenv from tailscale.com/net/dns/resolver+
tailscale.com/util/cmpver from tailscale.com/net/dns+

View File

@ -52,6 +52,7 @@ import (
"tailscale.com/types/persist"
"tailscale.com/types/ptr"
"tailscale.com/types/tkatype"
"tailscale.com/util/cache"
"tailscale.com/util/clientmetric"
"tailscale.com/util/multierr"
"tailscale.com/util/singleflight"
@ -82,6 +83,9 @@ type Direct struct {
dialPlan ControlDialPlanner // can be nil
controlKeyMu sync.Mutex // guards controlKeyCache
controlKeyCache cache.Cache[string, *tailcfg.OverTLSPublicKeyResponse]
mu sync.Mutex // mutex guards the following fields
serverKey key.MachinePublic // original ("legacy") nacl crypto_box-based public key
serverNoiseKey key.MachinePublic
@ -151,6 +155,10 @@ type Options struct {
// If we receive a new DialPlan from the server, this value will be
// updated.
DialPlan ControlDialPlanner
// ControlKeyCache caches Noise keys returned from control; if nil, no
// cache will be used.
ControlKeyCache cache.Cache[string, *tailcfg.OverTLSPublicKeyResponse]
}
// ControlDialPlanner is the interface optionally supplied when creating a
@ -264,6 +272,11 @@ func NewDirect(opts Options) (*Direct, error) {
httpc = &http.Client{Transport: tr}
}
ckcache := opts.ControlKeyCache
if ckcache == nil {
ckcache = cache.None[string, *tailcfg.OverTLSPublicKeyResponse]{}
}
c := &Direct{
httpc: httpc,
controlKnobs: opts.ControlKnobs,
@ -286,6 +299,7 @@ func NewDirect(opts Options) (*Direct, error) {
dialer: opts.Dialer,
dnsCache: dnsCache,
dialPlan: opts.DialPlan,
controlKeyCache: ckcache,
}
if opts.Hostinfo == nil {
c.SetHostinfo(hostinfo.New())
@ -492,11 +506,12 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
c.logf("doLogin(regen=%v, hasUrl=%v)", regen, opt.URL != "")
if serverKey.IsZero() {
keys, err := loadServerPubKeys(ctx, c.httpc, c.serverURL)
keys, cached, err := c.loadServerPubKeys(ctx)
if err != nil {
c.logf("error fetching control server key (serverURL=%q): %v", c.serverURL, err)
return regen, opt.URL, nil, err
}
c.logf("control server key from %s: ts2021=%s, legacy=%v", c.serverURL, keys.PublicKey.ShortString(), keys.LegacyPublicKey.ShortString())
c.logf("control server key from %s: cached=%v ts2021=%s, legacy=%v", c.serverURL, cached, keys.PublicKey.ShortString(), keys.LegacyPublicKey.ShortString())
c.mu.Lock()
c.serverKey = keys.LegacyPublicKey
@ -1260,39 +1275,54 @@ func encode(v any, serverKey, serverNoiseKey key.MachinePublic, mkey key.Machine
return mkey.SealTo(serverKey, b), nil
}
func loadServerPubKeys(ctx context.Context, httpc *http.Client, serverURL string) (*tailcfg.OverTLSPublicKeyResponse, error) {
keyURL := fmt.Sprintf("%v/key?v=%d", serverURL, tailcfg.CurrentCapabilityVersion)
req, err := http.NewRequestWithContext(ctx, "GET", keyURL, nil)
if err != nil {
return nil, fmt.Errorf("create control key request: %v", err)
}
res, err := httpc.Do(req)
if err != nil {
return nil, fmt.Errorf("fetch control key: %v", err)
}
defer res.Body.Close()
b, err := io.ReadAll(io.LimitReader(res.Body, 64<<10))
if err != nil {
return nil, fmt.Errorf("fetch control key response: %v", err)
}
if res.StatusCode != 200 {
return nil, fmt.Errorf("fetch control key: %d", res.StatusCode)
}
var out tailcfg.OverTLSPublicKeyResponse
jsonErr := json.Unmarshal(b, &out)
if jsonErr == nil {
return &out, nil
}
func (c *Direct) loadServerPubKeys(ctx context.Context) (ret *tailcfg.OverTLSPublicKeyResponse, cached bool, err error) {
c.controlKeyMu.Lock()
defer c.controlKeyMu.Unlock()
cached = true
// Some old control servers might not be updated to send the new format.
// Accept the old pre-JSON format too.
out = tailcfg.OverTLSPublicKeyResponse{}
k, err := key.ParseMachinePublicUntyped(mem.B(b))
if err != nil {
return nil, multierr.New(jsonErr, err)
}
out.LegacyPublicKey = k
return &out, nil
keyURL := fmt.Sprintf("%v/key?v=%d", c.serverURL, tailcfg.CurrentCapabilityVersion)
ret, err = c.controlKeyCache.Get(keyURL, func() (*tailcfg.OverTLSPublicKeyResponse, time.Time, error) {
cached = false
req, err := http.NewRequestWithContext(ctx, "GET", keyURL, nil)
if err != nil {
return nil, time.Time{}, fmt.Errorf("create control key request: %v", err)
}
res, err := c.httpc.Do(req)
if err != nil {
return nil, time.Time{}, fmt.Errorf("fetch control key: %v", err)
}
defer res.Body.Close()
b, err := io.ReadAll(io.LimitReader(res.Body, 64<<10))
if err != nil {
return nil, time.Time{}, fmt.Errorf("fetch control key response: %v", err)
}
if res.StatusCode != 200 {
return nil, time.Time{}, fmt.Errorf("fetch control key: %d", res.StatusCode)
}
// Cache keys for one minute at most, after which we'll
// re-fetch from the control server. However, if this cache has
// ServeExpired enabled, then we'll serve the expired key if
// the request to fetch a key fails.
expiry := c.clock.Now().Add(1 * time.Minute)
var out tailcfg.OverTLSPublicKeyResponse
jsonErr := json.Unmarshal(b, &out)
if jsonErr == nil {
return &out, expiry, nil
}
// Some old control servers might not be updated to send the new format.
// Accept the old pre-JSON format too.
out = tailcfg.OverTLSPublicKeyResponse{}
k, err := key.ParseMachinePublicUntyped(mem.B(b))
if err != nil {
return nil, time.Time{}, multierr.New(jsonErr, err)
}
out.LegacyPublicKey = k
return &out, expiry, nil
})
return
}
// DevKnob contains temporary internal-only debug knobs.

View File

@ -84,6 +84,7 @@ import (
"tailscale.com/types/preftype"
"tailscale.com/types/ptr"
"tailscale.com/types/views"
"tailscale.com/util/cache"
"tailscale.com/util/deephash"
"tailscale.com/util/dnsname"
"tailscale.com/util/mak"
@ -278,6 +279,8 @@ type LocalBackend struct {
// to use, unless overridden locally.
capForcedNetfilter string
controlKeyCache cache.Cache[string, *tailcfg.OverTLSPublicKeyResponse]
// ServeConfig fields. (also guarded by mu)
lastServeConfJSON mem.RO // last JSON that was parsed into serveConfig
serveConfig ipn.ServeConfigView // or !Valid if none
@ -388,6 +391,14 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
clock: clock,
selfUpdateProgress: make([]ipnstate.UpdateProgress, 0),
lastSelfUpdateState: ipnstate.UpdateFinished,
// NOTE: if we ever decide to cache this somewhere other than
// in-memory, ensure we're handling profile switches/shutdowns.
controlKeyCache: &cache.Single[string, *tailcfg.OverTLSPublicKeyResponse]{
// If we can't reach the control server, allow returning
// an expired value from the cache.
ServeExpired: true,
},
}
netMon := sys.NetMon.Get()
@ -1796,6 +1807,11 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
DialPlan: &b.dialPlan, // pointer because it can't be copied
ControlKnobs: b.sys.ControlKnobs(),
// Cache control key in-memory; this helps when moving to a
// network that does TLS MiTM, and allows us to continue using
// the previously-cached control key.
ControlKeyCache: b.controlKeyCache,
// Don't warn about broken Linux IP forwarding when
// netstack is being used.
SkipIPForwardingCheck: isNetstack,