diff --git a/util/truncate/truncate.go b/util/truncate/truncate.go new file mode 100644 index 000000000..b13dc6f10 --- /dev/null +++ b/util/truncate/truncate.go @@ -0,0 +1,31 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// Package truncate provides a utility function for safely truncating UTF-8 +// strings to a fixed length, respecting multi-byte codepoints. +package truncate + +// String returns a prefix of a UTF-8 string s, having length no greater than n +// bytes. If s exceeds this length, it is truncated at a point ≤ n so that the +// result does not end in a partial UTF-8 encoding. If s is less than or equal +// to this length, it is returned unmodified. +func String(s string, n int) string { + if n >= len(s) { + return s + } + + // Back up until we find the beginning of a UTF-8 encoding. + for n > 0 && s[n-1]&0xc0 == 0x80 { // 0x10... is a continuation byte + n-- + } + + // If we're at the beginning of a multi-byte encoding, back up one more to + // skip it. It's possible the value was already complete, but it's simpler + // if we only have to check in one direction. + // + // Otherwise, we have a single-byte code (0x00... or 0x01...). + if n > 0 && s[n-1]&0xc0 == 0xc0 { // 0x11... starts a multibyte encoding + n-- + } + return s[:n] +} diff --git a/util/truncate/truncate_test.go b/util/truncate/truncate_test.go new file mode 100644 index 000000000..c0d9e6e14 --- /dev/null +++ b/util/truncate/truncate_test.go @@ -0,0 +1,36 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package truncate_test + +import ( + "testing" + + "tailscale.com/util/truncate" +) + +func TestString(t *testing.T) { + tests := []struct { + input string + size int + want string + }{ + {"", 1000, ""}, // n > length + {"abc", 4, "abc"}, // n > length + {"abc", 3, "abc"}, // n == length + {"abcdefg", 4, "abcd"}, // n < length, safe + {"abcdefg", 0, ""}, // n < length, safe + {"abc\U0001fc2d", 3, "abc"}, // n < length, at boundary + {"abc\U0001fc2d", 4, "abc"}, // n < length, mid-rune + {"abc\U0001fc2d", 5, "abc"}, // n < length, mid-rune + {"abc\U0001fc2d", 6, "abc"}, // n < length, mid-rune + {"abc\U0001fc2defg", 7, "abc"}, // n < length, cut multibyte + } + + for _, tc := range tests { + got := truncate.String(tc.input, tc.size) + if got != tc.want { + t.Errorf("truncate(%q, %d): got %q, want %q", tc.input, tc.size, got, tc.want) + } + } +}