tka: implement synchronization mechanics

This PR implements the synchronization mechanics for TKA: generating a SyncOffer, processing a SyncOffer to find an intersection,
and computing the set of AUMs that should be transmitted to effect convergence.

This is the final PR implementing core mechanics for TKA.

Signed-off-by: Tom DNetto <tom@tailscale.com>
This commit is contained in:
Tom DNetto 2022-07-18 13:00:32 -07:00 committed by Tom
parent 165c8f898e
commit 393a229de9
2 changed files with 624 additions and 0 deletions

251
tka/sync.go Normal file
View File

@ -0,0 +1,251 @@
// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tka
import (
"errors"
"fmt"
"os"
)
const (
// Max iterations searching for any intersection.
maxSyncIter = 2000
// Max iterations searching for a head intersection.
maxSyncHeadIntersectionIter = 400
)
// ErrNoIntersection is returned when a shared AUM could
// not be determined when evaluating a remote sync offer.
var ErrNoIntersection = errors.New("no intersection")
// SyncOffer conveys information about the current head & ancestor AUMs,
// for the purpose of synchronization with some remote end.
//
// Ancestors should contain a subset of the ancestors of the chain.
// The last entry in that slice is the oldest-known AUM in the chain.
type SyncOffer struct {
Head AUMHash
Ancestors []AUMHash
}
const (
// The starting number of AUMs to skip when listing
// ancestors in a SyncOffer.
ancestorsSkipStart = 4
// How many bits to advance the skip count when listing
// ancestors in a SyncOffer.
//
// 2 bits, so (4<<2), so after skipping 4 it skips 16.
ancestorsSkipShift = 2
)
func (a *Authority) syncOffer() (SyncOffer, error) {
oldest := a.oldestAncestor.Hash()
out := SyncOffer{
Head: a.Head(),
Ancestors: make([]AUMHash, 0, 6), // 6 chosen arbitrarily.
}
// We send some subset of our ancestors to help the remote
// find a more-recent 'head intersection'.
// The number of AUMs between each ancestor entry gets
// exponentially larger.
var (
skipAmount uint64 = ancestorsSkipStart
curs AUMHash = a.Head()
)
for i := uint64(0); i < maxSyncHeadIntersectionIter; i++ {
if i > 0 && (i%skipAmount) == 0 {
out.Ancestors = append(out.Ancestors, curs)
skipAmount = skipAmount << ancestorsSkipShift
}
parent, err := a.storage.AUM(curs)
if err != nil {
if err != os.ErrNotExist {
return SyncOffer{}, err
}
break
}
// We add the oldest later on, so don't duplicate.
if parent.Hash() == oldest {
break
}
copy(curs[:], parent.PrevAUMHash)
}
out.Ancestors = append(out.Ancestors, oldest)
return out, nil
}
// SyncOffer returns an abbreviated description of the current AUM
// chain, which can be used to synchronize with another (untrusted)
// Authority instance.
//
// The returned SyncOffer structure should be transmitted to the remote
// Authority, which should call MissingAUMs() using it to determine
// AUMs which need to be transmitted. This list of AUMs from the remote
// can then be applied locally with Inform().
//
// This SyncOffer + AUM exchange should be performed by both ends,
// because its possible that either end has AUMs that the other needs
// to find out about.
func (a *Authority) SyncOffer() (SyncOffer, error) {
return a.syncOffer()
}
// intersection describes how to synchronize AUMs with a remote
// authority.
type intersection struct {
// if true, no exchange of AUMs is needed.
upToDate bool
// headIntersection is the latest common AUM on the remote. In other
// words, we need to send all AUMs since this one.
headIntersection *AUMHash
// tailIntersection is the oldest common AUM on the remote. In other
// words, we diverge with the remote after this AUM, so we both need
// to transmit our AUM chain starting here.
tailIntersection *AUMHash
}
// computeSyncIntersection determines the common AUMs between a local and
// remote SyncOffer. This intersection can be used to synchronize both
// sides.
func computeSyncIntersection(authority *Authority, localOffer, remoteOffer SyncOffer) (*intersection, error) {
// Simple case: up to date.
if remoteOffer.Head == localOffer.Head {
return &intersection{upToDate: true, headIntersection: &localOffer.Head}, nil
}
// Case: 'head intersection'
// If we have the remote's head, its more likely than not that
// we have updates that build on that head. To confirm this,
// we iterate backwards through our chain to see if the given
// head is an ancestor of our current chain.
//
// In other words:
// <Us> A -> B -> C
// <Them> A -> B
// ∴ their head intersects with our chain, we need to send C
var hasRemoteHead bool
_, err := authority.storage.AUM(remoteOffer.Head)
if err != nil {
if err != os.ErrNotExist {
return nil, err
}
} else {
hasRemoteHead = true
}
if hasRemoteHead {
curs := localOffer.Head
for i := 0; i < maxSyncHeadIntersectionIter; i++ {
parent, err := authority.storage.AUM(curs)
if err != nil {
if err != os.ErrNotExist {
return nil, err
}
break
}
if parent.Hash() == remoteOffer.Head {
h := parent.Hash()
return &intersection{headIntersection: &h}, nil
}
copy(curs[:], parent.PrevAUMHash)
}
}
// Case: 'tail intersection'
// So we don't have a clue what the remote's head is, but
// if one of the ancestors they gave us is part of our chain,
// then theres an intersection, which is a starting point for
// the remote to send us AUMs from.
//
// We iterate the list of ancestors in order because the remote
// ordered them such that the newer ones are earlier, so with
// a bit of luck we can use an earlier one and hence do less work /
// transmit fewer AUMs.
for _, a := range remoteOffer.Ancestors {
state, err := computeStateAt(authority.storage, maxSyncIter, a)
if err != nil {
if err != os.ErrNotExist {
return nil, fmt.Errorf("computeStateAt: %v", err)
}
continue
}
end, _, err := fastForward(authority.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
return curs.Hash() == localOffer.Head
})
if err != nil {
return nil, err
}
// fastForward can terminate before the done condition if there are
// no more children left, so we check again before considering this
// an intersection.
if end.Hash() == localOffer.Head {
return &intersection{tailIntersection: &a}, nil
}
}
return nil, ErrNoIntersection
}
// MissingAUMs returns AUMs a remote may be missing based on the
// remotes' SyncOffer.
func (a *Authority) MissingAUMs(remoteOffer SyncOffer) ([]AUM, error) {
localOffer, err := a.syncOffer()
if err != nil {
return nil, fmt.Errorf("local syncOffer: %v", err)
}
intersection, err := computeSyncIntersection(a, localOffer, remoteOffer)
if err != nil {
return nil, fmt.Errorf("intersection: %v", err)
}
if intersection.upToDate {
return nil, nil
}
out := make([]AUM, 0, 12) // 12 chosen arbitrarily.
if intersection.headIntersection != nil {
state, err := computeStateAt(a.storage, maxSyncIter, *intersection.headIntersection)
if err != nil {
return nil, err
}
_, _, err = fastForward(a.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
if curs.Hash() != *intersection.headIntersection {
out = append(out, curs)
}
return false
})
return out, err
}
if intersection.tailIntersection != nil {
state, err := computeStateAt(a.storage, maxSyncIter, *intersection.tailIntersection)
if err != nil {
return nil, err
}
_, _, err = fastForward(a.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
if curs.Hash() != *intersection.tailIntersection {
out = append(out, curs)
}
return false
})
return out, err
}
panic("unreachable")
}

373
tka/sync_test.go Normal file
View File

@ -0,0 +1,373 @@
// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tka
import (
"bytes"
"strconv"
"testing"
"github.com/google/go-cmp/cmp"
)
func TestSyncOffer(t *testing.T) {
c := newTestchain(t, `
A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25
`)
a, err := Open(c.Chonk())
if err != nil {
t.Fatal(err)
}
got, err := a.SyncOffer()
if err != nil {
t.Fatal(err)
}
// A SyncOffer includes a selection of AUMs going backwards in the tree,
// progressively skipping more and more each iteration.
want := SyncOffer{
Head: c.AUMHashes["A25"],
Ancestors: []AUMHash{
c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart)],
c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart<<ancestorsSkipShift)],
c.AUMHashes["A1"],
},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
}
}
func TestComputeSyncIntersection_FastForward(t *testing.T) {
// Node 1 has: A1 -> A2
// Node 2 has: A1 -> A2 -> A3 -> A4
c := newTestchain(t, `
A1 -> A2 -> A3 -> A4
`)
a1H, a2H := c.AUMHashes["A1"], c.AUMHashes["A2"]
chonk1 := c.ChonkWith("A1", "A2")
n1, err := Open(chonk1)
if err != nil {
t.Fatal(err)
}
offer1, err := n1.SyncOffer()
if err != nil {
t.Fatal(err)
}
chonk2 := c.Chonk() // All AUMs
n2, err := Open(chonk2)
if err != nil {
t.Fatal(err)
}
offer2, err := n2.SyncOffer()
if err != nil {
t.Fatal(err)
}
// Node 1 only knows about the first two nodes, so the head of n2 is
// alien to it.
t.Run("n1", func(t *testing.T) {
got, err := computeSyncIntersection(n1, offer1, offer2)
if err != nil {
t.Fatalf("computeSyncIntersection() failed: %v", err)
}
want := &intersection{
tailIntersection: &a1H,
}
if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
t.Errorf("intersection diff (-want, +got):\n%s", diff)
}
})
// Node 2 knows about the full chain, so it can see that the head of n1
// intersects with a subset of its chain (a Head Intersection).
t.Run("n2", func(t *testing.T) {
got, err := computeSyncIntersection(n2, offer2, offer1)
if err != nil {
t.Fatalf("computeSyncIntersection() failed: %v", err)
}
want := &intersection{
headIntersection: &a2H,
}
if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
t.Errorf("intersection diff (-want, +got):\n%s", diff)
}
})
}
func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
// The number of nodes in the chain is longer than ancestorSkipStart,
// so that during sync both nodes are able to find a common ancestor
// which was later than A1.
c := newTestchain(t, `
A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
| -> F1
// Make F1 different to A9.
// hashSeed is chosen such that the hash is higher than A9.
F1.hashSeed = 7
`)
// Node 1 has: A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> F1
// Node 2 has: A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
f1H, a9H := c.AUMHashes["F1"], c.AUMHashes["A9"]
if bytes.Compare(f1H[:], a9H[:]) < 0 {
t.Fatal("failed assert: h(a9) > h(f1H)\nTweak hashSeed till this passes")
}
n1, err := Open(c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "F1"))
if err != nil {
t.Fatal(err)
}
offer1, err := n1.SyncOffer()
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(SyncOffer{
Head: c.AUMHashes["F1"],
Ancestors: []AUMHash{
c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)],
c.AUMHashes["A1"],
},
}, offer1); diff != "" {
t.Errorf("offer1 diff (-want, +got):\n%s", diff)
}
n2, err := Open(c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10"))
if err != nil {
t.Fatal(err)
}
offer2, err := n2.SyncOffer()
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(SyncOffer{
Head: c.AUMHashes["A10"],
Ancestors: []AUMHash{
c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)],
c.AUMHashes["A1"],
},
}, offer2); diff != "" {
t.Errorf("offer2 diff (-want, +got):\n%s", diff)
}
// Node 1 only knows about the first eight nodes, so the head of n2 is
// alien to it.
t.Run("n1", func(t *testing.T) {
// n2 has 10 nodes, so the first common ancestor should be 10-ancestorsSkipStart
wantIntersection := c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)]
got, err := computeSyncIntersection(n1, offer1, offer2)
if err != nil {
t.Fatalf("computeSyncIntersection() failed: %v", err)
}
want := &intersection{
tailIntersection: &wantIntersection,
}
if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
t.Errorf("intersection diff (-want, +got):\n%s", diff)
}
})
// Node 2 knows about the full chain but doesn't recognize the head.
t.Run("n2", func(t *testing.T) {
// n1 has 9 nodes, so the first common ancestor should be 9-ancestorsSkipStart
wantIntersection := c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)]
got, err := computeSyncIntersection(n2, offer2, offer1)
if err != nil {
t.Fatalf("computeSyncIntersection() failed: %v", err)
}
want := &intersection{
tailIntersection: &wantIntersection,
}
if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
t.Errorf("intersection diff (-want, +got):\n%s", diff)
}
})
}
func TestMissingAUMs_FastForward(t *testing.T) {
// Node 1 has: A1 -> A2
// Node 2 has: A1 -> A2 -> A3 -> A4
c := newTestchain(t, `
A1 -> A2 -> A3 -> A4
A1.hashSeed = 1
A2.hashSeed = 2
A3.hashSeed = 3
A4.hashSeed = 4
`)
chonk1 := c.ChonkWith("A1", "A2")
n1, err := Open(chonk1)
if err != nil {
t.Fatal(err)
}
offer1, err := n1.SyncOffer()
if err != nil {
t.Fatal(err)
}
chonk2 := c.Chonk() // All AUMs
n2, err := Open(chonk2)
if err != nil {
t.Fatal(err)
}
offer2, err := n2.SyncOffer()
if err != nil {
t.Fatal(err)
}
// Node 1 only knows about the first two nodes, so the head of n2 is
// alien to it. As such, it should send history from the newest ancestor,
// A1 (if the chain was longer there would be one in the middle).
t.Run("n1", func(t *testing.T) {
got, err := n1.MissingAUMs(offer2)
if err != nil {
t.Fatalf("MissingAUMs() failed: %v", err)
}
// Both sides have A1, so the only AUM that n2 might not have is
// A2.
want := []AUM{c.AUMs["A2"]}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
}
})
// Node 2 knows about the full chain, so it can see that the head of n1
// intersects with a subset of its chain (a Head Intersection).
t.Run("n2", func(t *testing.T) {
got, err := n2.MissingAUMs(offer1)
if err != nil {
t.Fatalf("MissingAUMs() failed: %v", err)
}
want := []AUM{
c.AUMs["A3"],
c.AUMs["A4"],
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
}
})
}
func TestMissingAUMs_Fork(t *testing.T) {
// Node 1 has: A1 -> A2 -> A3 -> F1
// Node 2 has: A1 -> A2 -> A3 -> A4
c := newTestchain(t, `
A1 -> A2 -> A3 -> A4
| -> F1
A1.hashSeed = 1
A2.hashSeed = 2
A3.hashSeed = 3
A4.hashSeed = 4
`)
chonk1 := c.ChonkWith("A1", "A2", "A3", "F1")
n1, err := Open(chonk1)
if err != nil {
t.Fatal(err)
}
offer1, err := n1.SyncOffer()
if err != nil {
t.Fatal(err)
}
chonk2 := c.ChonkWith("A1", "A2", "A3", "A4")
n2, err := Open(chonk2)
if err != nil {
t.Fatal(err)
}
offer2, err := n2.SyncOffer()
if err != nil {
t.Fatal(err)
}
t.Run("n1", func(t *testing.T) {
got, err := n1.MissingAUMs(offer2)
if err != nil {
t.Fatalf("MissingAUMs() failed: %v", err)
}
// Both sides have A1, so n1 will send everything it knows from
// there to head.
want := []AUM{
c.AUMs["A2"],
c.AUMs["A3"],
c.AUMs["F1"],
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
}
})
t.Run("n2", func(t *testing.T) {
got, err := n2.MissingAUMs(offer1)
if err != nil {
t.Fatalf("MissingAUMs() failed: %v", err)
}
// Both sides have A1, so n2 will send everything it knows from
// there to head.
want := []AUM{
c.AUMs["A2"],
c.AUMs["A3"],
c.AUMs["A4"],
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
}
})
}
func TestSyncSimpleE2E(t *testing.T) {
pub, priv := testingKey25519(t, 1)
key := Key{Kind: Key25519, Public: pub, Votes: 2}
c := newTestchain(t, `
G1 -> L1 -> L2 -> L3
G1.template = genesis
`,
optTemplate("genesis", AUM{MessageKind: AUMCheckpoint, State: &State{
Keys: []Key{key},
DisablementSecrets: [][]byte{disablementKDF([]byte{1, 2, 3})},
}}),
optKey("key", key, priv),
optSignAllUsing("key"))
node, err := Bootstrap(&Mem{}, c.AUMs["G1"])
if err != nil {
t.Fatalf("node Bootstrap() failed: %v", err)
}
control, err := Open(c.Chonk())
if err != nil {
t.Fatalf("control Open() failed: %v", err)
}
// Control knows the full chain, node only knows the genesis. Lets see
// if they can sync.
nodeOffer, err := node.SyncOffer()
if err != nil {
t.Fatal(err)
}
controlAUMs, err := control.MissingAUMs(nodeOffer)
if err != nil {
t.Fatalf("control.MissingAUMs(%v) failed: %v", nodeOffer, err)
}
if err := node.Inform(controlAUMs); err != nil {
t.Fatalf("node.Inform(%v) failed: %v", controlAUMs, err)
}
if cHash, nHash := control.Head(), node.Head(); cHash != nHash {
t.Errorf("node & control are not synced: c=%x, n=%x", cHash, nHash)
}
}