352 lines
8.5 KiB
Go
352 lines
8.5 KiB
Go
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package deephash hashes a Go value recursively, in a predictable order,
|
|
// without looping. The hash is only valid within the lifetime of a program.
|
|
// Users should not store the hash on disk or send it over the network.
|
|
// The hash is sufficiently strong and unique such that
|
|
// Hash(x) == Hash(y) is an appropriate replacement for x == y.
|
|
//
|
|
// This package, like most of the tailscale.com Go module, should be
|
|
// considered Tailscale-internal; we make no API promises.
|
|
package deephash
|
|
|
|
import (
|
|
"bufio"
|
|
"crypto/sha256"
|
|
"encoding/binary"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"hash"
|
|
"math"
|
|
"reflect"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
const scratchSize = 128
|
|
|
|
// hasher is reusable state for hashing a value.
|
|
// Get one via hasherPool.
|
|
type hasher struct {
|
|
h hash.Hash
|
|
bw *bufio.Writer
|
|
scratch [scratchSize]byte
|
|
visited map[uintptr]bool
|
|
}
|
|
|
|
// newHasher initializes a new hasher, for use by hasherPool.
|
|
func newHasher() *hasher {
|
|
h := &hasher{
|
|
h: sha256.New(),
|
|
visited: map[uintptr]bool{},
|
|
}
|
|
h.bw = bufio.NewWriterSize(h.h, h.h.BlockSize())
|
|
return h
|
|
}
|
|
|
|
// setBufioWriter switches the bufio writer to w after flushing
|
|
// any output to the old one. It then also returns the old one, so
|
|
// the caller can switch back to it.
|
|
func (h *hasher) setBufioWriter(w *bufio.Writer) (old *bufio.Writer) {
|
|
old = h.bw
|
|
old.Flush()
|
|
h.bw = w
|
|
return old
|
|
}
|
|
|
|
// Sum is an opaque checksum type that is comparable.
|
|
type Sum struct {
|
|
sum [sha256.Size]byte
|
|
}
|
|
|
|
func (s Sum) String() string {
|
|
return hex.EncodeToString(s.sum[:])
|
|
}
|
|
|
|
var (
|
|
once sync.Once
|
|
seed uint64
|
|
)
|
|
|
|
// Hash returns the hash of v.
|
|
func (h *hasher) Hash(v interface{}) (hash Sum) {
|
|
h.bw.Flush()
|
|
h.h.Reset()
|
|
once.Do(func() {
|
|
seed = uint64(time.Now().UnixNano())
|
|
})
|
|
h.uint(seed)
|
|
h.print(reflect.ValueOf(v))
|
|
h.bw.Flush()
|
|
// Sum into scratch & copy out, as hash.Hash is an interface
|
|
// so the slice necessarily escapes, and there's no sha256
|
|
// concrete type exported and we don't want the 'hash' result
|
|
// parameter to escape to the heap:
|
|
h.h.Sum(h.scratch[:0])
|
|
copy(hash.sum[:], h.scratch[:])
|
|
return
|
|
}
|
|
|
|
var hasherPool = &sync.Pool{
|
|
New: func() interface{} { return newHasher() },
|
|
}
|
|
|
|
// Hash returns the hash of v.
|
|
func Hash(v interface{}) Sum {
|
|
h := hasherPool.Get().(*hasher)
|
|
defer hasherPool.Put(h)
|
|
for k := range h.visited {
|
|
delete(h.visited, k)
|
|
}
|
|
return h.Hash(v)
|
|
}
|
|
|
|
// Update sets last to the hash of v and reports whether its value changed.
|
|
func Update(last *Sum, v ...interface{}) (changed bool) {
|
|
sum := Hash(v)
|
|
if sum == *last {
|
|
// unchanged.
|
|
return false
|
|
}
|
|
*last = sum
|
|
return true
|
|
}
|
|
|
|
var appenderToType = reflect.TypeOf((*appenderTo)(nil)).Elem()
|
|
|
|
type appenderTo interface {
|
|
AppendTo([]byte) []byte
|
|
}
|
|
|
|
func (h *hasher) uint(i uint64) {
|
|
binary.BigEndian.PutUint64(h.scratch[:8], i)
|
|
h.bw.Write(h.scratch[:8])
|
|
}
|
|
|
|
func (h *hasher) int(i int) {
|
|
binary.BigEndian.PutUint64(h.scratch[:8], uint64(i))
|
|
h.bw.Write(h.scratch[:8])
|
|
}
|
|
|
|
var uint8Type = reflect.TypeOf(byte(0))
|
|
|
|
// print hashes v into w.
|
|
// It reports whether it was able to do so without hitting a cycle.
|
|
func (h *hasher) print(v reflect.Value) (acyclic bool) {
|
|
if !v.IsValid() {
|
|
return true
|
|
}
|
|
|
|
w := h.bw
|
|
visited := h.visited
|
|
|
|
if v.CanInterface() {
|
|
// Use AppendTo methods, if available and cheap.
|
|
if v.CanAddr() && v.Type().Implements(appenderToType) {
|
|
a := v.Addr().Interface().(appenderTo)
|
|
scratch := a.AppendTo(h.scratch[:0])
|
|
w.Write(scratch)
|
|
return true
|
|
}
|
|
}
|
|
|
|
// Generic handling.
|
|
switch v.Kind() {
|
|
default:
|
|
panic(fmt.Sprintf("unhandled kind %v for type %v", v.Kind(), v.Type()))
|
|
case reflect.Ptr:
|
|
ptr := v.Pointer()
|
|
if visited[ptr] {
|
|
return false
|
|
}
|
|
visited[ptr] = true
|
|
return h.print(v.Elem())
|
|
case reflect.Struct:
|
|
acyclic = true
|
|
w.WriteString("struct")
|
|
h.int(v.NumField())
|
|
for i, n := 0, v.NumField(); i < n; i++ {
|
|
h.int(i)
|
|
if !h.print(v.Field(i)) {
|
|
acyclic = false
|
|
}
|
|
}
|
|
return acyclic
|
|
case reflect.Slice, reflect.Array:
|
|
vLen := v.Len()
|
|
if v.Kind() == reflect.Slice {
|
|
h.int(vLen)
|
|
}
|
|
if v.Type().Elem() == uint8Type && v.CanInterface() {
|
|
if vLen > 0 && vLen <= scratchSize {
|
|
// If it fits in scratch, avoid the Interface allocation.
|
|
// It seems tempting to do this for all sizes, doing
|
|
// scratchSize bytes at a time, but reflect.Slice seems
|
|
// to allocate, so it's not a win.
|
|
n := reflect.Copy(reflect.ValueOf(&h.scratch).Elem(), v)
|
|
w.Write(h.scratch[:n])
|
|
return true
|
|
}
|
|
fmt.Fprintf(w, "%s", v.Interface())
|
|
return true
|
|
}
|
|
acyclic = true
|
|
for i := 0; i < vLen; i++ {
|
|
h.int(i)
|
|
if !h.print(v.Index(i)) {
|
|
acyclic = false
|
|
}
|
|
}
|
|
return acyclic
|
|
case reflect.Interface:
|
|
return h.print(v.Elem())
|
|
case reflect.Map:
|
|
// TODO(bradfitz): ideally we'd avoid these map
|
|
// operations to detect cycles if we knew from the map
|
|
// element type that there no way to form a cycle,
|
|
// which is the common case. Notably, we don't care
|
|
// about hashing the same map+contents twice in
|
|
// different parts of the tree. In fact, we should
|
|
// ideally. (And this prevents it) We should only stop
|
|
// hashing when there's a cycle. What we should
|
|
// probably do is make sure we enumerate the data
|
|
// structure tree is a fixed order and then give each
|
|
// pointer an increasing number, and when we hit a
|
|
// dup, rather than emitting nothing, we should emit a
|
|
// "value #12" reference. Which implies that all things
|
|
// emit to the bufio.Writer should be type-tagged so
|
|
// we can distinguish loop references without risk of
|
|
// collisions.
|
|
ptr := v.Pointer()
|
|
if visited[ptr] {
|
|
return false
|
|
}
|
|
visited[ptr] = true
|
|
|
|
if h.hashMapAcyclic(v) {
|
|
return true
|
|
}
|
|
return h.hashMapFallback(v)
|
|
case reflect.String:
|
|
h.int(v.Len())
|
|
w.WriteString(v.String())
|
|
case reflect.Bool:
|
|
w.Write(strconv.AppendBool(h.scratch[:0], v.Bool()))
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
w.Write(strconv.AppendInt(h.scratch[:0], v.Int(), 10))
|
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
|
h.uint(v.Uint())
|
|
case reflect.Float32, reflect.Float64:
|
|
w.Write(strconv.AppendUint(h.scratch[:0], math.Float64bits(v.Float()), 10))
|
|
case reflect.Complex64, reflect.Complex128:
|
|
fmt.Fprintf(w, "%v", v.Complex())
|
|
}
|
|
return true
|
|
}
|
|
|
|
type mapHasher struct {
|
|
xbuf [sha256.Size]byte // XOR'ed accumulated buffer
|
|
ebuf [sha256.Size]byte // scratch buffer
|
|
s256 hash.Hash // sha256 hash.Hash
|
|
bw *bufio.Writer // to hasher into ebuf
|
|
val valueCache // re-usable values for map iteration
|
|
iter *reflect.MapIter // re-usable map iterator
|
|
}
|
|
|
|
func (mh *mapHasher) Reset() {
|
|
for i := range mh.xbuf {
|
|
mh.xbuf[i] = 0
|
|
}
|
|
}
|
|
|
|
func (mh *mapHasher) startEntry() {
|
|
for i := range mh.ebuf {
|
|
mh.ebuf[i] = 0
|
|
}
|
|
mh.bw.Flush()
|
|
mh.s256.Reset()
|
|
}
|
|
|
|
func (mh *mapHasher) endEntry() {
|
|
mh.bw.Flush()
|
|
for i, b := range mh.s256.Sum(mh.ebuf[:0]) {
|
|
mh.xbuf[i] ^= b
|
|
}
|
|
}
|
|
|
|
var mapHasherPool = &sync.Pool{
|
|
New: func() interface{} {
|
|
mh := new(mapHasher)
|
|
mh.s256 = sha256.New()
|
|
mh.bw = bufio.NewWriter(mh.s256)
|
|
mh.val = make(valueCache)
|
|
mh.iter = new(reflect.MapIter)
|
|
return mh
|
|
},
|
|
}
|
|
|
|
type valueCache map[reflect.Type]reflect.Value
|
|
|
|
func (c valueCache) get(t reflect.Type) reflect.Value {
|
|
v, ok := c[t]
|
|
if !ok {
|
|
v = reflect.New(t).Elem()
|
|
c[t] = v
|
|
}
|
|
return v
|
|
}
|
|
|
|
// hashMapAcyclic is the faster sort-free version of map hashing. If
|
|
// it detects a cycle it returns false and guarantees that nothing was
|
|
// written to w.
|
|
func (h *hasher) hashMapAcyclic(v reflect.Value) (acyclic bool) {
|
|
mh := mapHasherPool.Get().(*mapHasher)
|
|
defer mapHasherPool.Put(mh)
|
|
mh.Reset()
|
|
iter := mapIter(mh.iter, v)
|
|
defer mapIter(mh.iter, reflect.Value{}) // avoid pinning v from mh.iter when we return
|
|
|
|
// Temporarily switch to the map hasher's bufio.Writer.
|
|
oldw := h.setBufioWriter(mh.bw)
|
|
defer h.setBufioWriter(oldw)
|
|
|
|
k := mh.val.get(v.Type().Key())
|
|
e := mh.val.get(v.Type().Elem())
|
|
for iter.Next() {
|
|
key := iterKey(iter, k)
|
|
val := iterVal(iter, e)
|
|
mh.startEntry()
|
|
if !h.print(key) {
|
|
return false
|
|
}
|
|
if !h.print(val) {
|
|
return false
|
|
}
|
|
mh.endEntry()
|
|
}
|
|
oldw.Write(mh.xbuf[:])
|
|
return true
|
|
}
|
|
|
|
func (h *hasher) hashMapFallback(v reflect.Value) (acyclic bool) {
|
|
acyclic = true
|
|
sm := newSortedMap(v)
|
|
w := h.bw
|
|
fmt.Fprintf(w, "map[%d]{\n", len(sm.Key))
|
|
for i, k := range sm.Key {
|
|
if !h.print(k) {
|
|
acyclic = false
|
|
}
|
|
w.WriteString(": ")
|
|
if !h.print(sm.Value[i]) {
|
|
acyclic = false
|
|
}
|
|
w.WriteString("\n")
|
|
}
|
|
w.WriteString("}\n")
|
|
return acyclic
|
|
}
|