wgengine/router: make Linux delRoute idempotent, cidrDiff fail late as possible
This makes cidrDiff do as much as possible before failing, and makes a delete of an already-deleted rule be a no-op. We should never do this ourselves, but other things on the system can, and this should help us recover a bit. Also adds the start of root-requiring tests. TODO: hook into wgengine/monitor and notice when routes are changed behind our back, and invalidate our routes map and re-read from kernel (via the ip command) at least on the next reconfig call. Updates tailscale/corp#1338 Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
6a2c6541da
commit
2d96215d97
|
@ -478,7 +478,42 @@ func (r *linuxRouter) delRoute(cidr netaddr.IPPrefix) error {
|
|||
if r.ipRuleAvailable {
|
||||
args = append(args, "table", tailscaleRouteTable)
|
||||
}
|
||||
return r.cmd.run(args...)
|
||||
err := r.cmd.run(args...)
|
||||
if err != nil {
|
||||
ok, err := r.hasRoute(cidr)
|
||||
if err != nil {
|
||||
r.logf("warning: error checking whether %v even exists after error deleting it: %v", err)
|
||||
} else {
|
||||
if !ok {
|
||||
r.logf("warning: tried to delete route %v but it was already gone; ignoring error", cidr)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func dashFam(ip netaddr.IP) string {
|
||||
if ip.Is6() {
|
||||
return "-6"
|
||||
}
|
||||
return "-4"
|
||||
}
|
||||
|
||||
func (r *linuxRouter) hasRoute(cidr netaddr.IPPrefix) (bool, error) {
|
||||
args := []string{
|
||||
"ip", dashFam(cidr.IP), "route", "show",
|
||||
normalizeCIDR(cidr),
|
||||
"dev", r.tunname,
|
||||
}
|
||||
if r.ipRuleAvailable {
|
||||
args = append(args, "table", tailscaleRouteTable)
|
||||
}
|
||||
out, err := r.cmd.output(args...)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(out) > 0, nil
|
||||
}
|
||||
|
||||
// upInterface brings up the tunnel interface.
|
||||
|
@ -987,25 +1022,43 @@ func cidrDiff(kind string, old map[netaddr.IPPrefix]bool, new []netaddr.IPPrefix
|
|||
ret[cidr] = true
|
||||
}
|
||||
|
||||
var delFail []error
|
||||
for cidr := range old {
|
||||
if newMap[cidr] {
|
||||
continue
|
||||
}
|
||||
if err := del(cidr); err != nil {
|
||||
logf("%s del failed: %v", kind, err)
|
||||
return ret, err
|
||||
delFail = append(delFail, err)
|
||||
} else {
|
||||
delete(ret, cidr)
|
||||
}
|
||||
delete(ret, cidr)
|
||||
}
|
||||
if len(delFail) == 1 {
|
||||
return ret, delFail[0]
|
||||
}
|
||||
if len(delFail) > 0 {
|
||||
return ret, fmt.Errorf("%d delete %s failures; first was: %w", len(delFail), kind, delFail[0])
|
||||
}
|
||||
|
||||
var addFail []error
|
||||
for cidr := range newMap {
|
||||
if old[cidr] {
|
||||
continue
|
||||
}
|
||||
if err := add(cidr); err != nil {
|
||||
logf("%s add failed: %v", kind, err)
|
||||
return ret, err
|
||||
addFail = append(addFail, err)
|
||||
} else {
|
||||
ret[cidr] = true
|
||||
}
|
||||
ret[cidr] = true
|
||||
}
|
||||
|
||||
if len(addFail) == 1 {
|
||||
return ret, addFail[0]
|
||||
}
|
||||
if len(addFail) > 0 {
|
||||
return ret, fmt.Errorf("%d add %s failures; first was: %w", len(addFail), kind, addFail[0])
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
|
|
|
@ -5,14 +5,18 @@
|
|||
package router
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/tailscale/wireguard-go/tun"
|
||||
"inet.af/netaddr"
|
||||
)
|
||||
|
||||
|
@ -595,3 +599,69 @@ func (o *fakeOS) output(args ...string) ([]byte, error) {
|
|||
}
|
||||
return []byte(strings.Join(ret, "\n")), nil
|
||||
}
|
||||
|
||||
var tunTestNum int64
|
||||
|
||||
func createTestTUN(t *testing.T) tun.Device {
|
||||
const minimalMTU = 1280
|
||||
tunName := fmt.Sprintf("tuntest%d", atomic.AddInt64(&tunTestNum, 1))
|
||||
tun, err := tun.CreateTUN(tunName, minimalMTU)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateTUN(%q): %v", tunName, err)
|
||||
}
|
||||
return tun
|
||||
}
|
||||
|
||||
func TestDelRouteIdempotent(t *testing.T) {
|
||||
if os.Getuid() != 0 {
|
||||
t.Skip("test requires root")
|
||||
}
|
||||
tun := createTestTUN(t)
|
||||
defer tun.Close()
|
||||
|
||||
var logOutput bytes.Buffer
|
||||
logf := func(format string, args ...interface{}) {
|
||||
fmt.Fprintf(&logOutput, format, args...)
|
||||
if !bytes.HasSuffix(logOutput.Bytes(), []byte("\n")) {
|
||||
logOutput.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
|
||||
r, err := newUserspaceRouter(logf, nil, tun)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lr := r.(*linuxRouter)
|
||||
if err := lr.upInterface(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, s := range []string{
|
||||
"192.0.2.0/24", // RFC 5737
|
||||
"2001:DB8::/32", // RFC 3849
|
||||
} {
|
||||
cidr := netaddr.MustParseIPPrefix(s)
|
||||
if err := lr.addRoute(cidr); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for i := 0; i < 2; i++ {
|
||||
if err := lr.delRoute(cidr); err != nil {
|
||||
t.Fatalf("delRoute(i=%d): %v", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wantSubs := map[string]int{
|
||||
"warning: tried to delete route 192.0.2.0/24 but it was already gone; ignoring error": 1,
|
||||
"warning: tried to delete route 2001:db8::/32 but it was already gone; ignoring error": 1,
|
||||
}
|
||||
out := logOutput.String()
|
||||
for sub, want := range wantSubs {
|
||||
got := strings.Count(out, sub)
|
||||
if got != want {
|
||||
t.Errorf("log output substring %q occurred %d time; want %d", sub, got, want)
|
||||
}
|
||||
}
|
||||
if t.Failed() {
|
||||
t.Logf("Log output:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue