util/deephash: tighten up SelfHasher API (#11012)

Providing a hash.Block512 is an implementation detail of how deephash
works today, but providing an opaque type with mostly equivalent API
(i.e., HashUint8, HashBytes, etc. methods) is still sensible.
Thus, define a public Hasher type that exposes exactly the API
that an implementation of SelfHasher would want to call.
This gives us freedom to change the hashing algorithm of deephash
at some point in the future.

Also, this type is likely going to be called by types that are
going to memoize their own hash results, we additionally add
a HashSum method to simplify this use case.

Add documentation to SelfHasher on how a type might implement it.

Updates: corp#16409

Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
Joe Tsai 2024-02-01 17:07:41 -08:00 committed by GitHub
parent 84f8311bcd
commit 60657ac83f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 85 additions and 36 deletions

View File

@ -104,6 +104,70 @@ import (
// by looking up the hash in a magical map that returns the set of entries
// for that given hash.
// SelfHasher is implemented by types that can compute their own hash
// by writing values through the provided [Hasher] parameter.
// Implementations must not leak the provided [Hasher].
//
// If the implementation of SelfHasher recursively calls [deephash.Hash],
// then infinite recursion is quite likely to occur.
// To avoid this, use a type definition to drop methods before calling [deephash.Hash]:
//
// func (v *MyType) Hash(h deephash.Hasher) {
// v.hashMu.Lock()
// defer v.hashMu.Unlock()
// if v.dirtyHash {
// type MyTypeWithoutMethods MyType // type define MyType to drop Hash method
// v.dirtyHash = false // clear out dirty bit to avoid hashing over it
// v.hashSum = deephash.Sum{} // clear out hashSum to avoid hashing over it
// v.hashSum = deephash.Hash((*MyTypeWithoutMethods)(v))
// }
// h.HashSum(v.hashSum)
// }
//
// In the above example, we acquire a lock since it is possible that deephash
// is called in a concurrent manner, which implies that MyType.Hash may also
// be called in a concurrent manner. Whether this lock is necessary is
// application-dependent and left as an exercise to the reader.
// Also, the example assumes that dirtyHash is set elsewhere by application
// logic whenever a mutation is made to MyType that would alter the hash.
type SelfHasher interface {
Hash(Hasher)
}
// Hasher is a value passed to [SelfHasher.Hash] that allow implementations
// to hash themselves in a structured manner.
type Hasher struct{ h *hashx.Block512 }
// HashBytes hashes a sequence of bytes b.
// The length of b is not explicitly hashed.
func (h Hasher) HashBytes(b []byte) { h.h.HashBytes(b) }
// HashString hashes the string data of s
// The length of s is not explicitly hashed.
func (h Hasher) HashString(s string) { h.h.HashString(s) }
// HashUint8 hashes a uint8.
func (h Hasher) HashUint8(n uint8) { h.h.HashUint8(n) }
// HashUint16 hashes a uint16.
func (h Hasher) HashUint16(n uint16) { h.h.HashUint16(n) }
// HashUint32 hashes a uint32.
func (h Hasher) HashUint32(n uint32) { h.h.HashUint32(n) }
// HashUint64 hashes a uint64.
func (h Hasher) HashUint64(n uint64) { h.h.HashUint64(n) }
// HashSum hashes a [Sum].
func (h Hasher) HashSum(s Sum) {
// NOTE: Avoid calling h.HashBytes since it escapes b,
// which would force s to be heap allocated.
h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[0:8]))
h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[8:16]))
h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[16:24]))
h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[24:32]))
}
// hasher is reusable state for hashing a value.
// Get one via hasherPool.
type hasher struct {
@ -339,7 +403,7 @@ func makeTypeHasher(t reflect.Type) typeHasherFunc {
if t.Kind() != reflect.Pointer && t.Kind() != reflect.Interface {
// A method can be implemented on either the value receiver or pointer receiver.
if t.Implements(selfHasherType) || reflect.PointerTo(t).Implements(selfHasherType) {
return makeSelfHasherImpl(t)
return makeSelfHasher(t)
}
}
@ -401,10 +465,9 @@ func hashAddr(h *hasher, p pointer) {
}
}
func makeSelfHasherImpl(t reflect.Type) typeHasherFunc {
func makeSelfHasher(t reflect.Type) typeHasherFunc {
return func(h *hasher, p pointer) {
e := p.asValue(t)
e.Interface().(SelfHasher).Hash(&h.Block512)
p.asValue(t).Interface().(SelfHasher).Hash(Hasher{&h.Block512})
}
}

View File

@ -42,11 +42,19 @@ func (p appendBytes) AppendTo(b []byte) []byte {
return append(b, p...)
}
type implsSelfHasherValueRecv struct {
type selfHasherValueRecv struct {
emit uint64
}
func (s implsSelfHasherValueRecv) Hash(h *hashx.Block512) {
func (s selfHasherValueRecv) Hash(h *hashx.Block512) {
h.HashUint64(s.emit)
}
type selfHasherPointerRecv struct {
emit uint64
}
func (s *selfHasherPointerRecv) Hash(h *hashx.Block512) {
h.HashUint64(s.emit)
}
@ -178,12 +186,12 @@ func TestHash(t *testing.T) {
b[0] = 1
return b
}()))}, wantEq: false},
{in: tuple{&implsSelfHasher{}, &implsSelfHasher{}}, wantEq: true},
{in: tuple{(*implsSelfHasher)(nil), (*implsSelfHasher)(nil)}, wantEq: true},
{in: tuple{(*implsSelfHasher)(nil), &implsSelfHasher{}}, wantEq: false},
{in: tuple{&implsSelfHasher{emit: 1}, &implsSelfHasher{emit: 2}}, wantEq: false},
{in: tuple{implsSelfHasherValueRecv{emit: 1}, implsSelfHasherValueRecv{emit: 2}}, wantEq: false},
{in: tuple{implsSelfHasherValueRecv{emit: 2}, implsSelfHasherValueRecv{emit: 2}}, wantEq: true},
{in: tuple{&selfHasherPointerRecv{}, &selfHasherPointerRecv{}}, wantEq: true},
{in: tuple{(*selfHasherPointerRecv)(nil), (*selfHasherPointerRecv)(nil)}, wantEq: true},
{in: tuple{(*selfHasherPointerRecv)(nil), &selfHasherPointerRecv{}}, wantEq: false},
{in: tuple{&selfHasherPointerRecv{emit: 1}, &selfHasherPointerRecv{emit: 2}}, wantEq: false},
{in: tuple{selfHasherValueRecv{emit: 1}, selfHasherValueRecv{emit: 2}}, wantEq: false},
{in: tuple{selfHasherValueRecv{emit: 2}, selfHasherValueRecv{emit: 2}}, wantEq: true},
}
for _, tt := range tests {

View File

@ -7,21 +7,8 @@ import (
"net/netip"
"reflect"
"time"
"tailscale.com/util/hashx"
)
// SelfHasher is the interface implemented by types that can compute their own hash
// by writing values through the given parameter.
//
// Implementations of Hash MUST NOT call `Reset` or `Sum` on the provided argument.
//
// This interface should not be considered stable and is likely to change in the
// future.
type SelfHasher interface {
Hash(*hashx.Block512)
}
var (
timeTimeType = reflect.TypeOf((*time.Time)(nil)).Elem()
netipAddrType = reflect.TypeOf((*netip.Addr)(nil)).Elem()

View File

@ -12,17 +12,8 @@ import (
"tailscale.com/tailcfg"
"tailscale.com/types/structs"
"tailscale.com/util/hashx"
)
type implsSelfHasher struct {
emit uint64
}
func (s *implsSelfHasher) Hash(h *hashx.Block512) {
h.HashUint64(s.emit)
}
func TestTypeIsMemHashable(t *testing.T) {
tests := []struct {
val any
@ -76,7 +67,7 @@ func TestTypeIsMemHashable(t *testing.T) {
false},
{[0]chan bool{}, true},
{struct{ f [0]func() }{}, true},
{&implsSelfHasher{}, false},
{&selfHasherPointerRecv{}, false},
}
for _, tt := range tests {
got := typeIsMemHashable(reflect.TypeOf(tt.val))
@ -112,7 +103,7 @@ func TestTypeIsRecursive(t *testing.T) {
{val: unsafe.Pointer(nil), want: false},
{val: make(RecursiveChan), want: true},
{val: make(chan int), want: false},
{val: (*implsSelfHasher)(nil), want: false},
{val: (*selfHasherPointerRecv)(nil), want: false},
}
for _, tt := range tests {
got := typeIsRecursive(reflect.TypeOf(tt.val))