tailscale/taildrop/resume_test.go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause

package taildrop

import (
	"bytes"
	"io"
	"math/rand"
	"os"
	"testing"
	"testing/iotest"

	"tailscale.com/util/must"
)

func TestResume(t *testing.T) {
	oldBlockSize := blockSize
	defer func() { blockSize = oldBlockSize }()
	blockSize = 256

	m := ManagerOptions{Logf: t.Logf, Dir: t.TempDir()}.New()
	defer m.Shutdown()

	rn := rand.New(rand.NewSource(0))
	want := make([]byte, 12345)
	must.Get(io.ReadFull(rn, want))

	t.Run("resume-noexist", func(t *testing.T) {
		r := io.Reader(bytes.NewReader(want))

		next, close, err := m.HashPartialFile("", "foo")
		must.Do(err)
		defer close()
		offset, r, err := ResumeReader(r, next)
		must.Do(err)
		must.Do(close()) // Windows wants the file handle to be closed to rename it.

		must.Get(m.PutFile("", "foo", r, offset, -1))
		got := must.Get(os.ReadFile(must.Get(joinDir(m.opts.Dir, "foo"))))
		if !bytes.Equal(got, want) {
			t.Errorf("content mismatches")
		}
	})

	t.Run("resume-retry", func(t *testing.T) {
		rn := rand.New(rand.NewSource(0))
		for i := 0; true; i++ {
			r := io.Reader(bytes.NewReader(want))

			next, close, err := m.HashPartialFile("", "bar")
			must.Do(err)
			defer close()
			offset, r, err := ResumeReader(r, next)
			must.Do(err)
			must.Do(close()) // Windows wants the file handle to be closed to rename it.

			numWant := rn.Int63n(min(int64(len(want))-offset, 1000) + 1)
			if offset < int64(len(want)) {
				r = io.MultiReader(io.LimitReader(r, numWant), iotest.ErrReader(io.ErrClosedPipe))
			}
			if _, err := m.PutFile("", "bar", r, offset, -1); err == nil {
				break
			}
			if i > 1000 {
				t.Fatalf("too many iterations to complete the test")
			}
		}
		got := must.Get(os.ReadFile(must.Get(joinDir(m.opts.Dir, "bar"))))
		if !bytes.Equal(got, want) {
			t.Errorf("content mismatches")
		}
	})
}
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`// Copyright (c) Tailscale Inc & AUTHORS`
			`// SPDX-License-Identifier: BSD-3-Clause`

			`package taildrop`

			`import (`
			`"bytes"`
			`"io"`
			`"math/rand"`
			`"os"`
			`"testing"`
			`"testing/iotest"`

			`"tailscale.com/util/must"`
			`)`

			`func TestResume(t *testing.T) {`
			`oldBlockSize := blockSize`
			`defer func() { blockSize = oldBlockSize }()`
			`blockSize = 256`

taildrop: implement asynchronous file deletion (#9844) File resumption requires keeping partial files around for some time, but we must still eventually delete them if never resumed. Thus, we implement asynchronous file deletion, which could spawn a background goroutine to delete the files. We also use the same mechanism for deleting files on Windows, where a file can't be deleted if there is still an open file handle. We can enqueue those with the asynchronous file deleter as well. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-17 21:46:05 +01:00			`m := ManagerOptions{Logf: t.Logf, Dir: t.TempDir()}.New()`
			`defer m.Shutdown()`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00
			`rn := rand.New(rand.NewSource(0))`
			`want := make([]byte, 12345)`
			`must.Get(io.ReadFull(rn, want))`

taildrop: switch hashing to be streaming based (#9861) While the previous logic was correct, it did not perform well. Resuming is a dance between the client and server, where 1. the client requests hashes for a partial file, 2. the server then computes those hashes, 3. the client computes hashes locally and compares them. 4. goto 1 while the partial file still has data While step 2 is running, the client is sitting idle. While step 3 is running, the server is sitting idle. By streaming over the block hash immediately after the server computes it, the client can start checking the hash, while the server works on the next hash (in a pipelined manner). This performs dramatically better and also uses less memory as we don't need to hold a list of hashes, but only need to handle one hash at a time. There are two detriments to this approach: * The HTTP API relies on a JSON stream, which is not a standard REST-like pattern. However, since we implement both client and server, this is fine. * While the stream is on-going, we hold an open file handle on the server side while the file is being hashed. On really slow streams, this could hold a file open forever. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-18 01:53:40 +01:00			`t.Run("resume-noexist", func(t *testing.T) {`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`r := io.Reader(bytes.NewReader(want))`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00
taildrop: switch hashing to be streaming based (#9861) While the previous logic was correct, it did not perform well. Resuming is a dance between the client and server, where 1. the client requests hashes for a partial file, 2. the server then computes those hashes, 3. the client computes hashes locally and compares them. 4. goto 1 while the partial file still has data While step 2 is running, the client is sitting idle. While step 3 is running, the server is sitting idle. By streaming over the block hash immediately after the server computes it, the client can start checking the hash, while the server works on the next hash (in a pipelined manner). This performs dramatically better and also uses less memory as we don't need to hold a list of hashes, but only need to handle one hash at a time. There are two detriments to this approach: * The HTTP API relies on a JSON stream, which is not a standard REST-like pattern. However, since we implement both client and server, this is fine. * While the stream is on-going, we hold an open file handle on the server side while the file is being hashed. On really slow streams, this could hold a file open forever. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-18 01:53:40 +01:00			`next, close, err := m.HashPartialFile("", "foo")`
			`must.Do(err)`
			`defer close()`
			`offset, r, err := ResumeReader(r, next)`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`must.Do(err)`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00			`must.Do(close()) // Windows wants the file handle to be closed to rename it.`

taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`must.Get(m.PutFile("", "foo", r, offset, -1))`
taildrop: implement asynchronous file deletion (#9844) File resumption requires keeping partial files around for some time, but we must still eventually delete them if never resumed. Thus, we implement asynchronous file deletion, which could spawn a background goroutine to delete the files. We also use the same mechanism for deleting files on Windows, where a file can't be deleted if there is still an open file handle. We can enqueue those with the asynchronous file deleter as well. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-17 21:46:05 +01:00			`got := must.Get(os.ReadFile(must.Get(joinDir(m.opts.Dir, "foo"))))`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`if !bytes.Equal(got, want) {`
			`t.Errorf("content mismatches")`
			`}`
			`})`

			`t.Run("resume-retry", func(t *testing.T) {`
			`rn := rand.New(rand.NewSource(0))`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00			`for i := 0; true; i++ {`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`r := io.Reader(bytes.NewReader(want))`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00
			`next, close, err := m.HashPartialFile("", "bar")`
taildrop: switch hashing to be streaming based (#9861) While the previous logic was correct, it did not perform well. Resuming is a dance between the client and server, where 1. the client requests hashes for a partial file, 2. the server then computes those hashes, 3. the client computes hashes locally and compares them. 4. goto 1 while the partial file still has data While step 2 is running, the client is sitting idle. While step 3 is running, the server is sitting idle. By streaming over the block hash immediately after the server computes it, the client can start checking the hash, while the server works on the next hash (in a pipelined manner). This performs dramatically better and also uses less memory as we don't need to hold a list of hashes, but only need to handle one hash at a time. There are two detriments to this approach: * The HTTP API relies on a JSON stream, which is not a standard REST-like pattern. However, since we implement both client and server, this is fine. * While the stream is on-going, we hold an open file handle on the server side while the file is being hashed. On really slow streams, this could hold a file open forever. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-18 01:53:40 +01:00			`must.Do(err)`
			`defer close()`
			`offset, r, err := ResumeReader(r, next)`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`must.Do(err)`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00			`must.Do(close()) // Windows wants the file handle to be closed to rename it.`

taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`numWant := rn.Int63n(min(int64(len(want))-offset, 1000) + 1)`
			`if offset < int64(len(want)) {`
			`r = io.MultiReader(io.LimitReader(r, numWant), iotest.ErrReader(io.ErrClosedPipe))`
			`}`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00			`if _, err := m.PutFile("", "bar", r, offset, -1); err == nil {`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`break`
			`}`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00			`if i > 1000 {`
			`t.Fatalf("too many iterations to complete the test")`
			`}`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`}`
taildrop: fix TestResume (#9874) Previously, the test simply relied on: defer close() to cleanup file handles. This works fine on Unix-based systems, but not on Windows, which dislikes deleting files where an open file handle continues to exist. Fix the test by explicitly closing the file handle after we are done with the resource. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2023-10-19 02:07:30 +01:00			`got := must.Get(os.ReadFile(must.Get(joinDir(m.opts.Dir, "bar"))))`
taildrop: add logic for resuming partial files (#9785) We add the following API: * type FileChecksums * type Checksum * func Manager.PartialFiles * func Manager.HashPartialFile * func ResumeReader The Manager methods provide the ability to query for partial files and retrieve a list of checksums for a given partial file. The ResumeReader function is a helper that wraps an io.Reader to discard content that is identical locally and remotely. The FileChecksums type represents the checksums of a file and is safe to JSON marshal and send over the wire. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net> Co-authored-by: Rhea Ghosh <rhea@tailscale.com> 2023-10-13 00:50:11 +01:00			`if !bytes.Equal(got, want) {`
			`t.Errorf("content mismatches")`
			`}`
			`})`
			`}`