tstest/integration: add initial work on NixOS VM-based integration tests

Andrew Dunham 2023-12-18 17:29:37 -05:00
# To run:
# 1. Put a Tailscale auth key for the test tailnet in ./tailscale-test.key
# 2. Run the Nix test:
# nix-build --show-trace --option sandbox false ./pmp-epoch.nix
# 3. On success, the command builds and exits successfully.
# 3. On an error, the command exits with a non-zero exit code and prints the error; for example:
# error: builder for '/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-vm-test-run-tailscale-test.drv' failed with exit code 1;
# last 10 log lines:
# > Test "Foo" failed with error: "bar"
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable";
pkgs = import nixpkgs { config = {}; overlays = []; };
inherit (pkgs) lib;
# Set debug = true to do the following:
# 1. Boots VMs sequentially so that output isn't interleaved
# 2. Prints the output of various commands
# 3. Enables debug logging from tailscaled (--verbose 2)
debug = true;
authKey = lib.fileContents ./tailscale-test.key;
usePatched = true;
tsOverlay = self: super: if (!usePatched) then {} else {
tailscale = super.tailscale.override {
buildGoModule = args: super.buildGoModule (args // {
version = "2024-01-04";
src = super.pkgs.fetchFromGitHub {
owner = "tailscale";
repo = "tailscale";
rev = "10c595d962a43fa1c01642e1ea295b7eb98e74a6";
hash = "sha256-tY3kxXtvz/Bw05yYeZvRe5Laz7Js2exwzXCWWVCKAG8=";
vendorHash = "sha256-uMVRdgO/HTs0CKqWPUFEL/rFvzio1vblTUaz5Cgi+5Q=";
in pkgs.nixosTest {
name = "pmp-epoch";
nodes = {
# This is our fake "router" that runs miniupnp
router = { config, lib, pkgs, ... }: {
networking.nameservers = [ "" "" ];
# Trust the internal interface so that portmapping packets aren't blocked.
networking.firewall.trustedInterfaces = [ "eth1" ];
environment.systemPackages = with pkgs; [
services.miniupnpd = {
enable = true;
externalInterface = "eth0";
internalIPs = [ "eth1" ];
upnp = false;
natpmp = true;
# We need to provide an external IP to portmap to; we could use STUN to
# discover what the "real" IP is, but that doesn't work in all cases
# and we don't actually care what it is.
appendConfig = ''
# NAT from our eth1 internal interface to the external eth0.
networking.nat = {
enable = true;
internalIPs = [ "" ];
externalInterface = "eth0";
client = { config, lib, pkgs, ... }: {
networking.nameservers = [ "" "" ];
nixpkgs.overlays = [ tsOverlay ];
services.tailscale = {
enable = true;
authKeyFile = pkgs.writeText "ts.key" authKey;
environment.systemPackages = with pkgs; [ iproute2 ];
# Don't start Tailscale automatically; we need to start it only after we
# take eth0 down on boot.
systemd.services = {
tailscaled-autoconnect.wantedBy = lib.mkForce [];
tailscaled.wantedBy = lib.mkForce [];
testScript = ''
debug = ${if debug then "True" else "False"}
def dprint(*args, **kwargs):
if debug:
print(*args, **kwargs)
def print_network_debug(client):
with subtest("Network Debugging Information"):
print(client.succeed("ip addr"))
print(client.succeed("ip route"))
print(client.succeed("echo 'route get' && ip route get || true"))
print(client.succeed("echo 'resolv.conf' && cat /etc/resolv.conf"))
def wait_and_get_ts_ip(client):
# NOTE: can't wait for "tailscaled-autoconnect.service" since a oneshot
# service never actually "starts"; wait multi-user.target and then wait
# until we have a valid IP
client.wait_until_succeeds("tailscale ip -4 | egrep '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+'")
# Verify that we have an IP address
ip = client.succeed("tailscale ip -4").strip()
return ip
# Start the router first
# Wait for an IP, then restart miniupnpd to ensure that it knows about our external IP
router.wait_until_succeeds("ip addr show dev eth0 | grep '10.0.2.'")
router.succeed("systemctl restart miniupnpd.service")
# Start the client
# Disable the eth0 interface for the client and set up a route through our router.
with subtest("Route traffic through eth1"):
client.succeed("ip link set eth0 down")
client.succeed("ip route add default via dev eth1 src")
client.succeed("ping -c1")
# Start Tailscale
with subtest("Start Tailscale"):
client.succeed("systemctl start tailscaled.service")
client.succeed("systemctl start tailscaled-autoconnect.service")
client_ip = wait_and_get_ts_ip(client)
dprint(f"client Tailscale IP: {client_ip}")
# Run the netcheck from the client and verify that we have NAT-PMP support.
with subtest("Portmapping"):
portmap = client.succeed("tailscale debug portmap").strip()
assert "PMP:true" in portmap, f"Tailscale portmap output does not have NAT-PMP support:\n{portmap}"
# TODO(andrew-d): we should restart miniupnpd and then verify we re-acquire
# a lease due to the epoch decreasing

# Setup: ensure the test tailnet has a SplitDNS entry for the 'testDomain'
# domain, below, set to the nameserver address in the 'addrs' attrset.
# To run:
# 1. Put a Tailscale auth key for the test tailnet in ./tailscale-test.key
# 2. Run the Nix test:
# nix-build --show-trace --option sandbox false ./tailscale-test.nix
# 3. On success, the command builds and exits successfully.
# 3. On an error, the command exits with a non-zero exit code and prints the error; for example:
# error: builder for '/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-vm-test-run-tailscale-test.drv' failed with exit code 1;
# last 10 log lines:
# > Test "Verify the client can make a request to a SplitDNS domain" failed with error: "command `curl --fail -vv --proxy socks5h://localhost:1055 http://bla.foo.bar/file.txt` failed (exit code 97)"
# The 'usePatched' variable controls whether to use the standard NixOS upstream
# Tailscale build, or a patched version specified in 'tsOverlay'. This is
# useful for testing out a fix or patch for an issue.
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable";
pkgs = import nixpkgs { config = {}; overlays = []; };
inherit (pkgs) lib;
# Set debug = true to do the following:
# 1. Boots VMs sequentially so that output isn't interleaved
# 2. Prints the output of various commands
# 3. Enables debug logging from tailscaled (--verbose 2)
debug = false;
authKey = lib.fileContents ./tailscale-test.key;
# These are addresses assigned by the host; see 'ipInterfaces' in
# nixos/lib/testing/network.nix for more details
addrs = {
nameserver = "";
tunclient = "";
userclient = "";
webserver = "";
socksPort = 1055;
testDomain = "foo.bar";
queryAddr = "bla.${testDomain}";
usePatched = false;
tsOverlay = self: super: if (!usePatched) then {} else {
tailscale = super.tailscale.override {
buildGoModule = args: super.buildGoModule (args // {
version = "2023-12-13";
src = super.pkgs.fetchFromGitHub {
owner = "itszero";
repo = "tailscale";
rev = "5cb309e8880ffa067975392b5c1493a660b301f1";
hash = "sha256-sOTknrJ09P/4rG/YZQ7BhapVr6FN0rjaD/IwemSHXHs=";
vendorHash = "sha256-Y7Z72ZwTcsdeI8DTqc6kDBlYNvQjNsRgD4D3fTsBoiQ=";
in pkgs.nixosTest {
name = "splitdns";
nodes = {
# This is the nameserver that we're querying.
nameserver = { config, lib, ... } : {
networking.firewall.allowedUDPPorts = [ 53 ];
networking.firewall.allowedTCPPorts = [ 53 ];
environment.systemPackages = with pkgs; [ dnsutils ];
services.dnsmasq = {
enable = true;
resolveLocalQueries = false;
settings = {
"domain-needed" = true;
"bogus-priv" = true;
"expand-hosts" = true;
"listen-address" = [ "" addrs.nameserver ];
"bind-interfaces" = true;
"server" = ["" ""];
"address=/${queryAddr}/${addrs.webserver}" = true;
# This is a basic webserver that our nameserver points to.
webserver = { config, lib, pkgs, ... } : {
networking.firewall.allowedTCPPorts = [ 80 443 ];
services.lighttpd = {
enable = true;
document-root = pkgs.runCommand "document-root" {} ''
mkdir -p "$out"
echo "i am the webserver" > "$out/file.txt"
# This is the Tailscale client node that makes the query
userclient = { config, lib, pkgs, ... }: {
networking = {
nameservers = [ "" "" ];
environment.systemPackages = with pkgs; [ dnsutils ];
# Use our patched Tailscale
nixpkgs.overlays = [ tsOverlay ];
services.tailscale = {
enable = true;
interfaceName = "userspace-networking"; # redundant due to the ExecStart override below, but for clarity
authKeyFile = pkgs.writeText "ts.key" authKey;
extraUpFlags = [
# Run in userspace-networking mode
systemd.services.tailscaled.serviceConfig.ExecStart = lib.mkForce [
# Clear existing ExecStart
# Override with new one that runs a SOCKS5 server
(lib.concatStringsSep " " ([
"--socks5-server=localhost:${toString socksPort}"
"--port=${toString config.services.tailscale.port}"
] ++ lib.optional debug "--verbose=2"))
tunclient = { config, lib, pkgs, ... }: {
# Use systemd-networkd and systemd-resolved to verify that we can
# correctly program that.
networking = {
useNetworkd = true;
nameservers = [ "" "" ];
systemd.network.enable = true;
services.resolved = {
enable = true;
fallbackDns = [ "" "" ];
# for 'dig'
environment.systemPackages = with pkgs; [ dnsutils ];
# Use our patched Tailscale
nixpkgs.overlays = [ tsOverlay ];
services.tailscale = {
enable = true;
authKeyFile = pkgs.writeText "ts.key" authKey;
extraUpFlags = [
# Run in userspace-networking mode
systemd.services.tailscaled.serviceConfig.ExecStart = lib.mkForce [
# Clear existing ExecStart
# Override with new one that runs a SOCKS5 server
(lib.concatStringsSep " " ([
"--port=${toString config.services.tailscale.port}"
] ++ lib.optional debug "--verbose=2"))
testScript = ''
debug = ${if debug then "True" else "False"}
def dprint(*args, **kwargs):
if debug:
print(*args, **kwargs)
def print_network_debug(client):
with subtest("Network Debugging Information"):
print(client.succeed("ip addr"))
print(client.succeed("ip route"))
print(client.succeed("echo 'route get' && ip route get || true"))
print(client.succeed("echo 'resolv.conf' && cat /etc/resolv.conf"))
if not debug:
# Wait for the webserver to start
res = webserver.succeed("curl --fail http://localhost/file.txt").strip()
assert "i am the webserver" in res, f"bad server response: '{res}'"
# Wait for the nameserver to start
# Verify that our DNS settings (on the nameserver) succeed.
output = nameserver.succeed("dig +short ${queryAddr} @${addrs.nameserver}").strip()
dprint("dig output:", output)
assert output == "${addrs.webserver}", f"bad dig result: '{output}'"
def assert_dns(client, nameserver, addr, want):
for flag in ["+ignore", "+tcp"]:
output = client.succeed(f"dig +short {flag} {addr} @{nameserver}").strip()
dprint("client dig output:", output)
assert output == want, f"bad dig result with flag '{flag}': '{output}'"
# Wait for Tailscale to start on the client node
with subtest("userspace-networking"):
if debug:
# NOTE: can't wait for "tailscaled-autoconnect.service" since a oneshot
# service never actually "starts"; wait multi-user.target and then wait
# until we have a valid IP
userclient.wait_until_succeeds("tailscale ip -4 | egrep '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+'")
# Verify that we have an IP address
ip = userclient.succeed("tailscale ip -4")
print("Tailscale IP:", ip)
# Make a request through our SOCKS5 proxy to example.com to verify it succeeds
with subtest("Verify the client can make a request to a non-split domain"):
output = userclient.succeed("curl --fail --silent --show-error --proxy socks5h://localhost:${toString socksPort} http://example.com").strip()
dprint("example.com:", output)
assert "<title>Example Domain</title>" in output, f"bad server response: '{output}'"
with subtest("Verify the client can contact the nameserver"):
assert_dns(userclient, "${addrs.nameserver}", "${queryAddr}", "${addrs.webserver}")
# TODO: this should succeed but does not
if True:
with subtest("Verify the client can make a request to a SplitDNS domain"):
output = userclient.succeed("curl --fail -vv --proxy socks5h://localhost:${toString socksPort} http://${queryAddr}/file.txt").strip()
print("${queryAddr}:", output)
with subtest("TUN mode"):
if debug:
# NOTE: can't wait for "tailscaled-autoconnect.service" since a oneshot
# service never actually "starts"; wait multi-user.target and then wait
# until we have a valid IP
tunclient.wait_until_succeeds("tailscale ip -4 | egrep '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+'")
# Verify that we have an IP address
ip = tunclient.succeed("tailscale ip -4")
print("Tailscale IP:", ip)
# Make a request to example.com to verify it succeeds
with subtest("Verify the client can make a request to a non-split domain"):
output = tunclient.succeed("curl --fail --silent --show-error http://example.com").strip()
dprint("example.com:", output)
assert "<title>Example Domain</title>" in output, f"bad server response: '{output}'"
with subtest("Verify the client can contact the nameserver"):
assert_dns(tunclient, "${addrs.nameserver}", "${queryAddr}", "${addrs.webserver}")
# TODO: this should succeed but does not
if True:
with subtest("Verify the client can make a request to a SplitDNS domain"):
output = tunclient.succeed("curl --fail -vv http://${queryAddr}/file.txt").strip()
print("${queryAddr}:", output)

# To run:
# 1. Put a Tailscale auth key for the test tailnet in ./tailscale-test.key
# 2. Run the Nix test:
# nix-build --show-trace --option sandbox false ./tailscale-test.nix
# 3. On success, the command builds and exits successfully.
# 3. On an error, the command exits with a non-zero exit code and prints the error; for example:
# error: builder for '/nix/store/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-vm-test-run-tailscale-test.drv' failed with exit code 1;
# last 10 log lines:
# > Test "Verify the client can make a request to a SplitDNS domain" failed with error: "command `curl --fail -vv --proxy socks5h://localhost:1055 http://bla.foo.bar/file.txt` failed (exit code 97)"
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable";
pkgs = import nixpkgs { config = {}; overlays = []; };
inherit (pkgs) lib;
# Set debug = true to do the following:
# 1. Boots VMs sequentially so that output isn't interleaved
# 2. Prints the output of various commands
# 3. Enables debug logging from tailscaled (--verbose 2)
debug = false;
authKey = lib.fileContents ./tailscale-test.key;
testFile = pkgs.writeText "test.txt" ''
This is a test file that we are copying from one client to another via taildrop!
in pkgs.nixosTest {
name = "taildrop";
nodes = {
# This is the Tailscale client node that makes the query
client1 = { config, lib, pkgs, ... }: {
networking.nameservers = [ "" "" ];
services.tailscale = {
enable = true;
authKeyFile = pkgs.writeText "ts.key" authKey;
# TODO: verbosity
client2 = { config, lib, pkgs, ... }: {
networking.nameservers = [ "" "" ];
services.tailscale = {
enable = true;
authKeyFile = pkgs.writeText "ts.key" authKey;
# TODO: verbosity
testScript = ''
import base64
debug = ${if debug then "True" else "False"}
def dprint(*args, **kwargs):
if debug:
print(*args, **kwargs)
def print_network_debug(client):
with subtest("Network Debugging Information"):
print(client.succeed("ip addr"))
print(client.succeed("ip route"))
print(client.succeed("echo 'route get' && ip route get || true"))
print(client.succeed("echo 'resolv.conf' && cat /etc/resolv.conf"))
def wait_and_get_ts_ip(client):
# NOTE: can't wait for "tailscaled-autoconnect.service" since a oneshot
# service never actually "starts"; wait multi-user.target and then wait
# until we have a valid IP
client.wait_until_succeeds("tailscale ip -4 | egrep '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+'")
# Verify that we have an IP address
ip = client.succeed("tailscale ip -4").strip()
return ip
if not debug:
# Wait for each client to start
client1_ip = wait_and_get_ts_ip(client1)
client2_ip = wait_and_get_ts_ip(client2)
# Send a file from client1 to client2
source_file = "${testFile}"
with open(source_file, "rb") as f:
contents = f.read()
contents_b64 = base64.b64encode(contents).decode()
client1.copy_from_host(source_file, "/tmp/copied-file.txt")
client1.succeed(f"tailscale file cp /tmp/copied-file.txt {client2_ip}:")
# Grab the file on client2, and then move to host
client2.succeed("mkdir /tmp/taildrop && tailscale file get -wait -verbose /tmp/taildrop")
got_contents_b64 = client2.succeed("cat /tmp/taildrop/copied-file.txt | base64").strip()
got_contents = base64.b64decode(got_contents_b64)
assert contents == got_contents, f"Mismatched contents after Taildrop:\ngot: {got_contents!r}"