net/dnsfallback: cache most recent DERP map on disk (#5545)
This is especially helpful as we launch newer DERPs over time, and older clients have progressively out-of-date static DERP maps baked in. After this, as long as the client has successfully connected once, it'll cache the most recent DERP map it knows about. Resolves an in-code comment from @bradfitz Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
parent
060ecb010f
commit
b8596f2a2f
|
@ -34,6 +34,7 @@ import (
|
|||
"tailscale.com/ipn/ipnstate"
|
||||
"tailscale.com/ipn/policy"
|
||||
"tailscale.com/net/dns"
|
||||
"tailscale.com/net/dnsfallback"
|
||||
"tailscale.com/net/interfaces"
|
||||
"tailscale.com/net/netutil"
|
||||
"tailscale.com/net/tsaddr"
|
||||
|
@ -731,6 +732,9 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) {
|
|||
b.e.SetNetworkMap(st.NetMap)
|
||||
b.e.SetDERPMap(st.NetMap.DERPMap)
|
||||
|
||||
// Update our cached DERP map
|
||||
dnsfallback.UpdateCache(st.NetMap.DERPMap)
|
||||
|
||||
b.send(ipn.Notify{NetMap: st.NetMap})
|
||||
}
|
||||
if st.URL != "" {
|
||||
|
|
|
@ -37,6 +37,7 @@ import (
|
|||
"tailscale.com/ipn/ipnlocal"
|
||||
"tailscale.com/ipn/localapi"
|
||||
"tailscale.com/logtail/backoff"
|
||||
"tailscale.com/net/dnsfallback"
|
||||
"tailscale.com/net/netstat"
|
||||
"tailscale.com/net/netutil"
|
||||
"tailscale.com/net/tsdial"
|
||||
|
@ -786,6 +787,8 @@ func New(logf logger.Logf, logid string, store ipn.StateStore, eng wgengine.Engi
|
|||
b.SetTailnetKeyAuthority(authority, storage)
|
||||
logf("tka initialized at head %x", authority.Head())
|
||||
}
|
||||
|
||||
dnsfallback.SetCachePath(filepath.Join(root, "derpmap.cached.json"))
|
||||
} else {
|
||||
logf("network-lock unavailable; no state directory")
|
||||
}
|
||||
|
|
|
@ -20,8 +20,12 @@ import (
|
|||
"net/http"
|
||||
"net/netip"
|
||||
"net/url"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"tailscale.com/atomicfile"
|
||||
"tailscale.com/net/netns"
|
||||
"tailscale.com/net/tlsdial"
|
||||
"tailscale.com/net/tshttpproxy"
|
||||
|
@ -39,6 +43,7 @@ func Lookup(ctx context.Context, host string) ([]netip.Addr, error) {
|
|||
}
|
||||
|
||||
dm := getDERPMap()
|
||||
|
||||
var cands4, cands6 []nameIP
|
||||
for _, dr := range dm.Regions {
|
||||
for _, n := range dr.Nodes {
|
||||
|
@ -128,12 +133,45 @@ type dnsMap map[string][]netip.Addr
|
|||
// getDERPMap returns some DERP map. The DERP servers also run a fallback
|
||||
// DNS server.
|
||||
func getDERPMap() *tailcfg.DERPMap {
|
||||
// TODO(bradfitz): try to read the last known DERP map from disk,
|
||||
// at say /var/lib/tailscale/derpmap.txt and write it when it changes,
|
||||
// and read it here.
|
||||
// But ultimately the fallback will be to use a copy baked into the binary,
|
||||
// which is this part:
|
||||
dm := getStaticDERPMap()
|
||||
|
||||
// Merge in any DERP servers from the cached map that aren't in the
|
||||
// static map; this ensures that we're getting new region(s) while not
|
||||
// overriding the built-in fallbacks if things go horribly wrong and we
|
||||
// get a bad DERP map.
|
||||
//
|
||||
// TODO(andrew): should we expect OmitDefaultRegions here? We're not
|
||||
// forwarding traffic, just resolving DNS, so maybe we can ignore that
|
||||
// value anyway?
|
||||
cached := cachedDERPMap.Load()
|
||||
if cached == nil {
|
||||
return dm
|
||||
}
|
||||
|
||||
for id, region := range cached.Regions {
|
||||
dr, ok := dm.Regions[id]
|
||||
if !ok {
|
||||
dm.Regions[id] = region
|
||||
continue
|
||||
}
|
||||
|
||||
// Add any nodes that we don't already have.
|
||||
seen := make(map[string]bool)
|
||||
for _, n := range dr.Nodes {
|
||||
seen[n.HostName] = true
|
||||
}
|
||||
for _, n := range region.Nodes {
|
||||
if !seen[n.HostName] {
|
||||
dr.Nodes = append(dr.Nodes, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dm
|
||||
}
|
||||
|
||||
// getStaticDERPMap returns the DERP map that was compiled into this binary.
|
||||
func getStaticDERPMap() *tailcfg.DERPMap {
|
||||
dm := new(tailcfg.DERPMap)
|
||||
if err := json.Unmarshal(staticDERPMapJSON, dm); err != nil {
|
||||
panic(err)
|
||||
|
@ -143,3 +181,67 @@ func getDERPMap() *tailcfg.DERPMap {
|
|||
|
||||
//go:embed dns-fallback-servers.json
|
||||
var staticDERPMapJSON []byte
|
||||
|
||||
// cachedDERPMap is the path to a cached DERP map that we loaded from our on-disk cache.
|
||||
var cachedDERPMap atomic.Pointer[tailcfg.DERPMap]
|
||||
|
||||
// cachePath is the path to the DERP map cache file, set by SetCachePath via
|
||||
// ipnserver.New() if we have a state directory.
|
||||
var cachePath string
|
||||
|
||||
// UpdateCache stores the DERP map cache back to disk.
|
||||
//
|
||||
// The caller must not mutate 'c' after calling this function.
|
||||
func UpdateCache(c *tailcfg.DERPMap) {
|
||||
// Don't do anything if nothing changed.
|
||||
curr := cachedDERPMap.Load()
|
||||
if reflect.DeepEqual(curr, c) {
|
||||
return
|
||||
}
|
||||
|
||||
d, err := json.Marshal(c)
|
||||
if err != nil {
|
||||
log.Printf("[v1] dnsfallback: UpdateCache error marshaling: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Only store after we're confident this is at least valid JSON
|
||||
cachedDERPMap.Store(c)
|
||||
|
||||
// Don't try writing if we don't have a cache path set; this can happen
|
||||
// when we don't have a state path (e.g. /var/lib/tailscale) configured.
|
||||
if cachePath != "" {
|
||||
err = atomicfile.WriteFile(cachePath, d, 0600)
|
||||
if err != nil {
|
||||
log.Printf("[v1] dnsfallback: UpdateCache error writing: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
log.Printf("[v2] dnsfallback: UpdateCache succeeded")
|
||||
}
|
||||
|
||||
// SetCachePath sets the path to the on-disk DERP map cache that we store and
|
||||
// update. Additionally, if a file at this path exists, we load it and merge it
|
||||
// with the DERP map baked into the binary.
|
||||
//
|
||||
// This function should be called before any calls to UpdateCache, as it is not
|
||||
// concurrency-safe.
|
||||
func SetCachePath(path string) {
|
||||
cachePath = path
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
log.Printf("[v1] dnsfallback: SetCachePath error reading %q: %v", path, err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
dm := new(tailcfg.DERPMap)
|
||||
if err := json.NewDecoder(f).Decode(dm); err != nil {
|
||||
log.Printf("[v1] dnsfallback: SetCachePath error decoding %q: %v", path, err)
|
||||
return
|
||||
}
|
||||
|
||||
cachedDERPMap.Store(dm)
|
||||
log.Printf("[v2] dnsfallback: SetCachePath loaded cached DERP map")
|
||||
}
|
||||
|
|
|
@ -4,7 +4,15 @@
|
|||
|
||||
package dnsfallback
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"tailscale.com/tailcfg"
|
||||
)
|
||||
|
||||
func TestGetDERPMap(t *testing.T) {
|
||||
dm := getDERPMap()
|
||||
|
@ -15,3 +23,151 @@ func TestGetDERPMap(t *testing.T) {
|
|||
t.Fatal("no regions")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCache(t *testing.T) {
|
||||
cacheFile := filepath.Join(t.TempDir(), "cache.json")
|
||||
|
||||
// Write initial cache value
|
||||
initialCache := &tailcfg.DERPMap{
|
||||
Regions: map[int]*tailcfg.DERPRegion{
|
||||
99: {
|
||||
RegionID: 99,
|
||||
RegionCode: "test",
|
||||
RegionName: "Testville",
|
||||
Nodes: []*tailcfg.DERPNode{{
|
||||
Name: "99a",
|
||||
RegionID: 99,
|
||||
HostName: "derp99a.tailscale.com",
|
||||
IPv4: "1.2.3.4",
|
||||
}},
|
||||
},
|
||||
|
||||
// Intentionally attempt to "overwrite" something
|
||||
1: {
|
||||
RegionID: 1,
|
||||
RegionCode: "r1",
|
||||
RegionName: "r1",
|
||||
Nodes: []*tailcfg.DERPNode{{
|
||||
Name: "1c",
|
||||
RegionID: 1,
|
||||
HostName: "derp1c.tailscale.com",
|
||||
IPv4: "127.0.0.1",
|
||||
IPv6: "::1",
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
d, err := json.Marshal(initialCache)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(cacheFile, d, 0666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Clear any existing cached DERP map(s)
|
||||
cachedDERPMap.Store(nil)
|
||||
|
||||
// Load the cache
|
||||
SetCachePath(cacheFile)
|
||||
if cm := cachedDERPMap.Load(); !reflect.DeepEqual(initialCache, cm) {
|
||||
t.Fatalf("cached map was %+v; want %+v", cm, initialCache)
|
||||
}
|
||||
|
||||
// Verify that our DERP map is merged with the cache.
|
||||
dm := getDERPMap()
|
||||
region, ok := dm.Regions[99]
|
||||
if !ok {
|
||||
t.Fatal("expected region 99")
|
||||
}
|
||||
if !reflect.DeepEqual(region, initialCache.Regions[99]) {
|
||||
t.Fatalf("region 99: got %+v; want %+v", region, initialCache.Regions[99])
|
||||
}
|
||||
|
||||
// Verify that our cache can't override a statically-baked-in DERP server.
|
||||
n0 := dm.Regions[1].Nodes[0]
|
||||
if n0.IPv4 == "127.0.0.1" || n0.IPv6 == "::1" {
|
||||
t.Errorf("got %+v; expected no overwrite for node", n0)
|
||||
}
|
||||
|
||||
// Also, make sure that the static DERP map still has the same first
|
||||
// node as when this test was last written/updated; this ensures that
|
||||
// we don't accidentally start allowing overwrites due to some of the
|
||||
// test's assumptions changing out from underneath us as we update the
|
||||
// JSON file of fallback servers.
|
||||
if getStaticDERPMap().Regions[1].Nodes[0].HostName != "derp1c.tailscale.com" {
|
||||
t.Errorf("DERP server has a different name; please update this test")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheUnchanged(t *testing.T) {
|
||||
cacheFile := filepath.Join(t.TempDir(), "cache.json")
|
||||
|
||||
// Write initial cache value
|
||||
initialCache := &tailcfg.DERPMap{
|
||||
Regions: map[int]*tailcfg.DERPRegion{
|
||||
99: {
|
||||
RegionID: 99,
|
||||
RegionCode: "test",
|
||||
RegionName: "Testville",
|
||||
Nodes: []*tailcfg.DERPNode{{
|
||||
Name: "99a",
|
||||
RegionID: 99,
|
||||
HostName: "derp99a.tailscale.com",
|
||||
IPv4: "1.2.3.4",
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
d, err := json.Marshal(initialCache)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(cacheFile, d, 0666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Clear any existing cached DERP map(s)
|
||||
cachedDERPMap.Store(nil)
|
||||
|
||||
// Load the cache
|
||||
SetCachePath(cacheFile)
|
||||
if cm := cachedDERPMap.Load(); !reflect.DeepEqual(initialCache, cm) {
|
||||
t.Fatalf("cached map was %+v; want %+v", cm, initialCache)
|
||||
}
|
||||
|
||||
// Remove the cache file on-disk, then re-set to the current value. If
|
||||
// our equality comparison is working, we won't rewrite the file
|
||||
// on-disk since the cached value won't have changed.
|
||||
if err := os.Remove(cacheFile); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
UpdateCache(initialCache)
|
||||
if _, err := os.Stat(cacheFile); !os.IsNotExist(err) {
|
||||
t.Fatalf("got err=%v; expected to not find cache file", err)
|
||||
}
|
||||
|
||||
// Now, update the cache with something slightly different and verify
|
||||
// that we did re-write the file on-disk.
|
||||
updatedCache := &tailcfg.DERPMap{
|
||||
Regions: map[int]*tailcfg.DERPRegion{
|
||||
99: {
|
||||
RegionID: 99,
|
||||
RegionCode: "test",
|
||||
RegionName: "Testville",
|
||||
Nodes: []*tailcfg.DERPNode{ /* set below */ },
|
||||
},
|
||||
},
|
||||
}
|
||||
clonedNode := *initialCache.Regions[99].Nodes[0]
|
||||
clonedNode.IPv4 = "1.2.3.5"
|
||||
updatedCache.Regions[99].Nodes = append(updatedCache.Regions[99].Nodes, &clonedNode)
|
||||
|
||||
UpdateCache(updatedCache)
|
||||
if st, err := os.Stat(cacheFile); err != nil {
|
||||
t.Fatalf("could not stat cache file; err=%v", err)
|
||||
} else if !st.Mode().IsRegular() || st.Size() == 0 {
|
||||
t.Fatalf("didn't find non-empty regular file; mode=%v size=%d", st.Mode(), st.Size())
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue