net/dnsfallback: cache most recent DERP map on disk (#5545)

This is especially helpful as we launch newer DERPs over time, and older
clients have progressively out-of-date static DERP maps baked in. After
this, as long as the client has successfully connected once, it'll cache
the most recent DERP map it knows about.

Resolves an in-code comment from @bradfitz

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
Andrew Dunham 2022-09-05 14:36:30 -04:00 committed by GitHub
parent 060ecb010f
commit b8596f2a2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 271 additions and 6 deletions

View File

@ -34,6 +34,7 @@ import (
"tailscale.com/ipn/ipnstate"
"tailscale.com/ipn/policy"
"tailscale.com/net/dns"
"tailscale.com/net/dnsfallback"
"tailscale.com/net/interfaces"
"tailscale.com/net/netutil"
"tailscale.com/net/tsaddr"
@ -731,6 +732,9 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) {
b.e.SetNetworkMap(st.NetMap)
b.e.SetDERPMap(st.NetMap.DERPMap)
// Update our cached DERP map
dnsfallback.UpdateCache(st.NetMap.DERPMap)
b.send(ipn.Notify{NetMap: st.NetMap})
}
if st.URL != "" {

View File

@ -37,6 +37,7 @@ import (
"tailscale.com/ipn/ipnlocal"
"tailscale.com/ipn/localapi"
"tailscale.com/logtail/backoff"
"tailscale.com/net/dnsfallback"
"tailscale.com/net/netstat"
"tailscale.com/net/netutil"
"tailscale.com/net/tsdial"
@ -786,6 +787,8 @@ func New(logf logger.Logf, logid string, store ipn.StateStore, eng wgengine.Engi
b.SetTailnetKeyAuthority(authority, storage)
logf("tka initialized at head %x", authority.Head())
}
dnsfallback.SetCachePath(filepath.Join(root, "derpmap.cached.json"))
} else {
logf("network-lock unavailable; no state directory")
}

View File

@ -20,8 +20,12 @@ import (
"net/http"
"net/netip"
"net/url"
"os"
"reflect"
"sync/atomic"
"time"
"tailscale.com/atomicfile"
"tailscale.com/net/netns"
"tailscale.com/net/tlsdial"
"tailscale.com/net/tshttpproxy"
@ -39,6 +43,7 @@ func Lookup(ctx context.Context, host string) ([]netip.Addr, error) {
}
dm := getDERPMap()
var cands4, cands6 []nameIP
for _, dr := range dm.Regions {
for _, n := range dr.Nodes {
@ -128,12 +133,45 @@ type dnsMap map[string][]netip.Addr
// getDERPMap returns some DERP map. The DERP servers also run a fallback
// DNS server.
func getDERPMap() *tailcfg.DERPMap {
// TODO(bradfitz): try to read the last known DERP map from disk,
// at say /var/lib/tailscale/derpmap.txt and write it when it changes,
// and read it here.
// But ultimately the fallback will be to use a copy baked into the binary,
// which is this part:
dm := getStaticDERPMap()
// Merge in any DERP servers from the cached map that aren't in the
// static map; this ensures that we're getting new region(s) while not
// overriding the built-in fallbacks if things go horribly wrong and we
// get a bad DERP map.
//
// TODO(andrew): should we expect OmitDefaultRegions here? We're not
// forwarding traffic, just resolving DNS, so maybe we can ignore that
// value anyway?
cached := cachedDERPMap.Load()
if cached == nil {
return dm
}
for id, region := range cached.Regions {
dr, ok := dm.Regions[id]
if !ok {
dm.Regions[id] = region
continue
}
// Add any nodes that we don't already have.
seen := make(map[string]bool)
for _, n := range dr.Nodes {
seen[n.HostName] = true
}
for _, n := range region.Nodes {
if !seen[n.HostName] {
dr.Nodes = append(dr.Nodes, n)
}
}
}
return dm
}
// getStaticDERPMap returns the DERP map that was compiled into this binary.
func getStaticDERPMap() *tailcfg.DERPMap {
dm := new(tailcfg.DERPMap)
if err := json.Unmarshal(staticDERPMapJSON, dm); err != nil {
panic(err)
@ -143,3 +181,67 @@ func getDERPMap() *tailcfg.DERPMap {
//go:embed dns-fallback-servers.json
var staticDERPMapJSON []byte
// cachedDERPMap is the path to a cached DERP map that we loaded from our on-disk cache.
var cachedDERPMap atomic.Pointer[tailcfg.DERPMap]
// cachePath is the path to the DERP map cache file, set by SetCachePath via
// ipnserver.New() if we have a state directory.
var cachePath string
// UpdateCache stores the DERP map cache back to disk.
//
// The caller must not mutate 'c' after calling this function.
func UpdateCache(c *tailcfg.DERPMap) {
// Don't do anything if nothing changed.
curr := cachedDERPMap.Load()
if reflect.DeepEqual(curr, c) {
return
}
d, err := json.Marshal(c)
if err != nil {
log.Printf("[v1] dnsfallback: UpdateCache error marshaling: %v", err)
return
}
// Only store after we're confident this is at least valid JSON
cachedDERPMap.Store(c)
// Don't try writing if we don't have a cache path set; this can happen
// when we don't have a state path (e.g. /var/lib/tailscale) configured.
if cachePath != "" {
err = atomicfile.WriteFile(cachePath, d, 0600)
if err != nil {
log.Printf("[v1] dnsfallback: UpdateCache error writing: %v", err)
return
}
}
log.Printf("[v2] dnsfallback: UpdateCache succeeded")
}
// SetCachePath sets the path to the on-disk DERP map cache that we store and
// update. Additionally, if a file at this path exists, we load it and merge it
// with the DERP map baked into the binary.
//
// This function should be called before any calls to UpdateCache, as it is not
// concurrency-safe.
func SetCachePath(path string) {
cachePath = path
f, err := os.Open(path)
if err != nil {
log.Printf("[v1] dnsfallback: SetCachePath error reading %q: %v", path, err)
return
}
defer f.Close()
dm := new(tailcfg.DERPMap)
if err := json.NewDecoder(f).Decode(dm); err != nil {
log.Printf("[v1] dnsfallback: SetCachePath error decoding %q: %v", path, err)
return
}
cachedDERPMap.Store(dm)
log.Printf("[v2] dnsfallback: SetCachePath loaded cached DERP map")
}

View File

@ -4,7 +4,15 @@
package dnsfallback
import "testing"
import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"testing"
"tailscale.com/tailcfg"
)
func TestGetDERPMap(t *testing.T) {
dm := getDERPMap()
@ -15,3 +23,151 @@ func TestGetDERPMap(t *testing.T) {
t.Fatal("no regions")
}
}
func TestCache(t *testing.T) {
cacheFile := filepath.Join(t.TempDir(), "cache.json")
// Write initial cache value
initialCache := &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
99: {
RegionID: 99,
RegionCode: "test",
RegionName: "Testville",
Nodes: []*tailcfg.DERPNode{{
Name: "99a",
RegionID: 99,
HostName: "derp99a.tailscale.com",
IPv4: "1.2.3.4",
}},
},
// Intentionally attempt to "overwrite" something
1: {
RegionID: 1,
RegionCode: "r1",
RegionName: "r1",
Nodes: []*tailcfg.DERPNode{{
Name: "1c",
RegionID: 1,
HostName: "derp1c.tailscale.com",
IPv4: "127.0.0.1",
IPv6: "::1",
}},
},
},
}
d, err := json.Marshal(initialCache)
if err != nil {
t.Fatal(err)
}
if err := os.WriteFile(cacheFile, d, 0666); err != nil {
t.Fatal(err)
}
// Clear any existing cached DERP map(s)
cachedDERPMap.Store(nil)
// Load the cache
SetCachePath(cacheFile)
if cm := cachedDERPMap.Load(); !reflect.DeepEqual(initialCache, cm) {
t.Fatalf("cached map was %+v; want %+v", cm, initialCache)
}
// Verify that our DERP map is merged with the cache.
dm := getDERPMap()
region, ok := dm.Regions[99]
if !ok {
t.Fatal("expected region 99")
}
if !reflect.DeepEqual(region, initialCache.Regions[99]) {
t.Fatalf("region 99: got %+v; want %+v", region, initialCache.Regions[99])
}
// Verify that our cache can't override a statically-baked-in DERP server.
n0 := dm.Regions[1].Nodes[0]
if n0.IPv4 == "127.0.0.1" || n0.IPv6 == "::1" {
t.Errorf("got %+v; expected no overwrite for node", n0)
}
// Also, make sure that the static DERP map still has the same first
// node as when this test was last written/updated; this ensures that
// we don't accidentally start allowing overwrites due to some of the
// test's assumptions changing out from underneath us as we update the
// JSON file of fallback servers.
if getStaticDERPMap().Regions[1].Nodes[0].HostName != "derp1c.tailscale.com" {
t.Errorf("DERP server has a different name; please update this test")
}
}
func TestCacheUnchanged(t *testing.T) {
cacheFile := filepath.Join(t.TempDir(), "cache.json")
// Write initial cache value
initialCache := &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
99: {
RegionID: 99,
RegionCode: "test",
RegionName: "Testville",
Nodes: []*tailcfg.DERPNode{{
Name: "99a",
RegionID: 99,
HostName: "derp99a.tailscale.com",
IPv4: "1.2.3.4",
}},
},
},
}
d, err := json.Marshal(initialCache)
if err != nil {
t.Fatal(err)
}
if err := os.WriteFile(cacheFile, d, 0666); err != nil {
t.Fatal(err)
}
// Clear any existing cached DERP map(s)
cachedDERPMap.Store(nil)
// Load the cache
SetCachePath(cacheFile)
if cm := cachedDERPMap.Load(); !reflect.DeepEqual(initialCache, cm) {
t.Fatalf("cached map was %+v; want %+v", cm, initialCache)
}
// Remove the cache file on-disk, then re-set to the current value. If
// our equality comparison is working, we won't rewrite the file
// on-disk since the cached value won't have changed.
if err := os.Remove(cacheFile); err != nil {
t.Fatal(err)
}
UpdateCache(initialCache)
if _, err := os.Stat(cacheFile); !os.IsNotExist(err) {
t.Fatalf("got err=%v; expected to not find cache file", err)
}
// Now, update the cache with something slightly different and verify
// that we did re-write the file on-disk.
updatedCache := &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
99: {
RegionID: 99,
RegionCode: "test",
RegionName: "Testville",
Nodes: []*tailcfg.DERPNode{ /* set below */ },
},
},
}
clonedNode := *initialCache.Regions[99].Nodes[0]
clonedNode.IPv4 = "1.2.3.5"
updatedCache.Regions[99].Nodes = append(updatedCache.Regions[99].Nodes, &clonedNode)
UpdateCache(updatedCache)
if st, err := os.Stat(cacheFile); err != nil {
t.Fatalf("could not stat cache file; err=%v", err)
} else if !st.Mode().IsRegular() || st.Size() == 0 {
t.Fatalf("didn't find non-empty regular file; mode=%v size=%d", st.Mode(), st.Size())
}
}