2023-10-13 00:50:11 +01:00
|
|
|
// Copyright (c) Tailscale Inc & AUTHORS
|
|
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
|
|
|
|
package taildrop
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"io"
|
|
|
|
"math/rand"
|
|
|
|
"os"
|
|
|
|
"testing"
|
|
|
|
"testing/iotest"
|
|
|
|
|
|
|
|
"tailscale.com/util/must"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestResume(t *testing.T) {
|
|
|
|
oldBlockSize := blockSize
|
|
|
|
defer func() { blockSize = oldBlockSize }()
|
|
|
|
blockSize = 256
|
|
|
|
|
2023-10-17 21:46:05 +01:00
|
|
|
m := ManagerOptions{Logf: t.Logf, Dir: t.TempDir()}.New()
|
|
|
|
defer m.Shutdown()
|
2023-10-13 00:50:11 +01:00
|
|
|
|
|
|
|
rn := rand.New(rand.NewSource(0))
|
|
|
|
want := make([]byte, 12345)
|
|
|
|
must.Get(io.ReadFull(rn, want))
|
|
|
|
|
taildrop: switch hashing to be streaming based (#9861)
While the previous logic was correct, it did not perform well.
Resuming is a dance between the client and server, where
1. the client requests hashes for a partial file,
2. the server then computes those hashes,
3. the client computes hashes locally and compares them.
4. goto 1 while the partial file still has data
While step 2 is running, the client is sitting idle.
While step 3 is running, the server is sitting idle.
By streaming over the block hash immediately after the server
computes it, the client can start checking the hash,
while the server works on the next hash (in a pipelined manner).
This performs dramatically better and also uses less memory
as we don't need to hold a list of hashes, but only need to
handle one hash at a time.
There are two detriments to this approach:
* The HTTP API relies on a JSON stream,
which is not a standard REST-like pattern.
However, since we implement both client and server,
this is fine.
* While the stream is on-going, we hold an open file handle
on the server side while the file is being hashed.
On really slow streams, this could hold a file open forever.
Updates tailscale/corp#14772
Signed-off-by: Joe Tsai <joetsai@digital-static.net>
Co-authored-by: Rhea Ghosh <rhea@tailscale.com>
2023-10-18 01:53:40 +01:00
|
|
|
t.Run("resume-noexist", func(t *testing.T) {
|
2023-10-13 00:50:11 +01:00
|
|
|
r := io.Reader(bytes.NewReader(want))
|
2023-10-19 02:07:30 +01:00
|
|
|
|
taildrop: switch hashing to be streaming based (#9861)
While the previous logic was correct, it did not perform well.
Resuming is a dance between the client and server, where
1. the client requests hashes for a partial file,
2. the server then computes those hashes,
3. the client computes hashes locally and compares them.
4. goto 1 while the partial file still has data
While step 2 is running, the client is sitting idle.
While step 3 is running, the server is sitting idle.
By streaming over the block hash immediately after the server
computes it, the client can start checking the hash,
while the server works on the next hash (in a pipelined manner).
This performs dramatically better and also uses less memory
as we don't need to hold a list of hashes, but only need to
handle one hash at a time.
There are two detriments to this approach:
* The HTTP API relies on a JSON stream,
which is not a standard REST-like pattern.
However, since we implement both client and server,
this is fine.
* While the stream is on-going, we hold an open file handle
on the server side while the file is being hashed.
On really slow streams, this could hold a file open forever.
Updates tailscale/corp#14772
Signed-off-by: Joe Tsai <joetsai@digital-static.net>
Co-authored-by: Rhea Ghosh <rhea@tailscale.com>
2023-10-18 01:53:40 +01:00
|
|
|
next, close, err := m.HashPartialFile("", "foo")
|
|
|
|
must.Do(err)
|
|
|
|
defer close()
|
|
|
|
offset, r, err := ResumeReader(r, next)
|
2023-10-13 00:50:11 +01:00
|
|
|
must.Do(err)
|
2023-10-19 02:07:30 +01:00
|
|
|
must.Do(close()) // Windows wants the file handle to be closed to rename it.
|
|
|
|
|
2023-10-13 00:50:11 +01:00
|
|
|
must.Get(m.PutFile("", "foo", r, offset, -1))
|
2023-10-17 21:46:05 +01:00
|
|
|
got := must.Get(os.ReadFile(must.Get(joinDir(m.opts.Dir, "foo"))))
|
2023-10-13 00:50:11 +01:00
|
|
|
if !bytes.Equal(got, want) {
|
|
|
|
t.Errorf("content mismatches")
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("resume-retry", func(t *testing.T) {
|
|
|
|
rn := rand.New(rand.NewSource(0))
|
2023-10-19 02:07:30 +01:00
|
|
|
for i := 0; true; i++ {
|
2023-10-13 00:50:11 +01:00
|
|
|
r := io.Reader(bytes.NewReader(want))
|
2023-10-19 02:07:30 +01:00
|
|
|
|
|
|
|
next, close, err := m.HashPartialFile("", "bar")
|
taildrop: switch hashing to be streaming based (#9861)
While the previous logic was correct, it did not perform well.
Resuming is a dance between the client and server, where
1. the client requests hashes for a partial file,
2. the server then computes those hashes,
3. the client computes hashes locally and compares them.
4. goto 1 while the partial file still has data
While step 2 is running, the client is sitting idle.
While step 3 is running, the server is sitting idle.
By streaming over the block hash immediately after the server
computes it, the client can start checking the hash,
while the server works on the next hash (in a pipelined manner).
This performs dramatically better and also uses less memory
as we don't need to hold a list of hashes, but only need to
handle one hash at a time.
There are two detriments to this approach:
* The HTTP API relies on a JSON stream,
which is not a standard REST-like pattern.
However, since we implement both client and server,
this is fine.
* While the stream is on-going, we hold an open file handle
on the server side while the file is being hashed.
On really slow streams, this could hold a file open forever.
Updates tailscale/corp#14772
Signed-off-by: Joe Tsai <joetsai@digital-static.net>
Co-authored-by: Rhea Ghosh <rhea@tailscale.com>
2023-10-18 01:53:40 +01:00
|
|
|
must.Do(err)
|
|
|
|
defer close()
|
|
|
|
offset, r, err := ResumeReader(r, next)
|
2023-10-13 00:50:11 +01:00
|
|
|
must.Do(err)
|
2023-10-19 02:07:30 +01:00
|
|
|
must.Do(close()) // Windows wants the file handle to be closed to rename it.
|
|
|
|
|
2023-10-13 00:50:11 +01:00
|
|
|
numWant := rn.Int63n(min(int64(len(want))-offset, 1000) + 1)
|
|
|
|
if offset < int64(len(want)) {
|
|
|
|
r = io.MultiReader(io.LimitReader(r, numWant), iotest.ErrReader(io.ErrClosedPipe))
|
|
|
|
}
|
2023-10-19 02:07:30 +01:00
|
|
|
if _, err := m.PutFile("", "bar", r, offset, -1); err == nil {
|
2023-10-13 00:50:11 +01:00
|
|
|
break
|
|
|
|
}
|
2023-10-19 02:07:30 +01:00
|
|
|
if i > 1000 {
|
|
|
|
t.Fatalf("too many iterations to complete the test")
|
|
|
|
}
|
2023-10-13 00:50:11 +01:00
|
|
|
}
|
2023-10-19 02:07:30 +01:00
|
|
|
got := must.Get(os.ReadFile(must.Get(joinDir(m.opts.Dir, "bar"))))
|
2023-10-13 00:50:11 +01:00
|
|
|
if !bytes.Equal(got, want) {
|
|
|
|
t.Errorf("content mismatches")
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|