From 124363e0caef2dd79f5915d31742cf9ca6bac48d Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 26 Nov 2021 16:39:40 -0800 Subject: [PATCH] net/dns: detect and decode UTF-16 from wsl.exe earlier. Fixes #3170 Signed-off-by: David Anderson --- net/dns/utf.go | 56 ++++++++++++++++++++++++++++++++++++++++++ net/dns/utf_test.go | 25 +++++++++++++++++++ net/dns/wsl_windows.go | 43 ++++---------------------------- 3 files changed, 86 insertions(+), 38 deletions(-) create mode 100644 net/dns/utf.go create mode 100644 net/dns/utf_test.go diff --git a/net/dns/utf.go b/net/dns/utf.go new file mode 100644 index 000000000..dc55590db --- /dev/null +++ b/net/dns/utf.go @@ -0,0 +1,56 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package dns + +// This code is only used in Windows builds, but is in an +// OS-independent file so tests can run all the time. + +import ( + "bytes" + "encoding/binary" + "unicode/utf16" +) + +// maybeUnUTF16 tries to detect whether bs contains UTF-16, and if so +// translates it to regular UTF-8. +// +// Some of wsl.exe's output get printed as UTF-16, which breaks a +// bunch of things. Try to detect this by looking for a zero byte in +// the first few bytes of output (which will appear if any of those +// codepoints are basic ASCII - very likely). From that we can infer +// that UTF-16 is being printed, and the byte order in use, and we +// decode that back to UTF-8. +// +// https://github.com/microsoft/WSL/issues/4607 +func maybeUnUTF16(bs []byte) []byte { + if len(bs)%2 != 0 { + // Can't be complete UTF-16. + return bs + } + checkLen := 20 + if len(bs) < checkLen { + checkLen = len(bs) + } + zeroOff := bytes.IndexByte(bs[:checkLen], 0) + if zeroOff == -1 { + return bs + } + + // We assume wsl.exe is trying to print an ASCII codepoint, + // meaning the zero byte is in the upper 8 bits of the + // codepoint. That means we can use the zero's byte offset to + // work out if we're seeing little-endian or big-endian + // UTF-16. + var endian binary.ByteOrder = binary.LittleEndian + if zeroOff%2 == 0 { + endian = binary.BigEndian + } + + var u16 []uint16 + for i := 0; i < len(bs); i += 2 { + u16 = append(u16, endian.Uint16(bs[i:])) + } + return []byte(string(utf16.Decode(u16))) +} diff --git a/net/dns/utf_test.go b/net/dns/utf_test.go new file mode 100644 index 000000000..fbe228248 --- /dev/null +++ b/net/dns/utf_test.go @@ -0,0 +1,25 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package dns + +import "testing" + +func TestMaybeUnUTF16(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"abc", "abc"}, // UTF-8 + {"a\x00b\x00c\x00", "abc"}, // UTF-16-LE + {"\x00a\x00b\x00c", "abc"}, // UTF-16-BE + } + + for _, test := range tests { + got := string(maybeUnUTF16([]byte(test.in))) + if got != test.want { + t.Errorf("maybeUnUTF16(%q) = %q, want %q", test.in, got, test.want) + } + } +} diff --git a/net/dns/wsl_windows.go b/net/dns/wsl_windows.go index 9706af04a..227e378ef 100644 --- a/net/dns/wsl_windows.go +++ b/net/dns/wsl_windows.go @@ -12,7 +12,6 @@ import ( "os/user" "strings" "syscall" - "unicode/utf16" "golang.org/x/sys/windows" "tailscale.com/types/logger" @@ -26,29 +25,7 @@ func wslDistros() ([]string, error) { return nil, fmt.Errorf("%v: %q", err, string(b)) } - // The first line of output is a WSL header. E.g. - // - // C:\tsdev>wsl.exe -l - // Windows Subsystem for Linux Distributions: - // Ubuntu-20.04 (Default) - // - // We can skip it by passing '-q', but here we put it to work. - // It turns out wsl.exe -l is broken, and outputs UTF-16 names - // that nothing can read. (Try `wsl.exe -l | more`.) - // So we look at the header to see if it's UTF-16. - // If so, we run the rest through a UTF-16 parser. - // - // https://github.com/microsoft/WSL/issues/4607 - var output string - if bytes.HasPrefix(b, []byte("W\x00i\x00n\x00d\x00o\x00w\x00s\x00")) { - output, err = decodeUTF16(b) - if err != nil { - return nil, fmt.Errorf("failed to decode wsl.exe -l output %q: %v", b, err) - } - } else { - output = string(b) - } - lines := strings.Split(output, "\n") + lines := strings.Split(string(b), "\n") if len(lines) < 1 { return nil, nil } @@ -66,19 +43,6 @@ func wslDistros() ([]string, error) { return distros, nil } -func decodeUTF16(b []byte) (string, error) { - if len(b) == 0 { - return "", nil - } else if len(b)%2 != 0 { - return "", fmt.Errorf("decodeUTF16: invalid length %d", len(b)) - } - var u16 []uint16 - for i := 0; i < len(b); i += 2 { - u16 = append(u16, uint16(b[i])+(uint16(b[i+1])<<8)) - } - return string(utf16.Decode(u16)), nil -} - // wslManager is a DNS manager for WSL2 linux distributions. // It configures /etc/wsl.conf and /etc/resolv.conf. type wslManager struct { @@ -225,7 +189,10 @@ func wslCombinedOutput(cmd *exec.Cmd) ([]byte, error) { cmd.Stdout = buf cmd.Stderr = buf err := wslRun(cmd) - return buf.Bytes(), err + if err != nil { + return nil, err + } + return maybeUnUTF16(buf.Bytes()), nil } func wslRun(cmd *exec.Cmd) (err error) {