metrics: optimize CurrentFDs to not allocate on Linux

It was 50% of our allocs on one of our servers. (!!)

Updates #2784

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2021-09-02 11:49:32 -07:00 committed by Brad Fitzpatrick
parent db3586cd43
commit 99a1c74a6a
4 changed files with 126 additions and 10 deletions

View File

@ -32,7 +32,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/ipn from tailscale.com/cmd/tailscale/cli+
tailscale.com/ipn/ipnstate from tailscale.com/cmd/tailscale/cli+
tailscale.com/kube from tailscale.com/ipn
tailscale.com/metrics from tailscale.com/derp
💣 tailscale.com/metrics from tailscale.com/derp
tailscale.com/net/dnscache from tailscale.com/derp/derphttp
tailscale.com/net/flowtrack from tailscale.com/wgengine/filter+
💣 tailscale.com/net/interfaces from tailscale.com/cmd/tailscale/cli+

View File

@ -112,7 +112,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/logtail from tailscale.com/logpolicy
tailscale.com/logtail/backoff from tailscale.com/control/controlclient+
tailscale.com/logtail/filch from tailscale.com/logpolicy
tailscale.com/metrics from tailscale.com/derp
💣 tailscale.com/metrics from tailscale.com/derp
tailscale.com/net/dns from tailscale.com/ipn/ipnlocal+
tailscale.com/net/dns/resolver from tailscale.com/wgengine+
tailscale.com/net/dnscache from tailscale.com/control/controlclient+

View File

@ -4,10 +4,104 @@
package metrics
import "os"
import (
"fmt"
"log"
"syscall"
"unsafe"
)
func currentFDs() int {
// TODO(bradfitz): do this without so many allocations on Linux.
ents, _ := os.ReadDir("/proc/self/fd")
return len(ents)
fd, err := openProcSelfFD()
if err != nil {
return 0
}
defer syscall.Close(fd)
count := 0
const blockSize = 8 << 10
buf := make([]byte, blockSize) // stack-allocated; doesn't escape
bufp := 0 // starting read position in buf
nbuf := 0 // end valid data in buf
dirent := &syscall.Dirent{}
for {
if bufp >= nbuf {
bufp = 0
nbuf, err = readDirent(fd, buf)
if err != nil {
log.Printf("currentFDs: readDirent: %v", err)
return 0
}
if nbuf <= 0 {
return count
}
}
consumed, name := parseDirEnt(dirent, buf[bufp:nbuf])
bufp += consumed
if len(name) == 0 || string(name) == "." || string(name) == ".." {
continue
}
count++
}
}
func direntNamlen(dirent *syscall.Dirent) int {
const fixedHdr = uint16(unsafe.Offsetof(syscall.Dirent{}.Name))
limit := dirent.Reclen - fixedHdr
const dirNameLen = 256 // sizeof syscall.Dirent.Name
if limit > dirNameLen {
limit = dirNameLen
}
for i := uint16(0); i < limit; i++ {
if dirent.Name[i] == 0 {
return int(i)
}
}
panic("failed to find terminating 0 byte in dirent")
}
func parseDirEnt(dirent *syscall.Dirent, buf []byte) (consumed int, name []byte) {
// golang.org/issue/37269
copy(unsafe.Slice((*byte)(unsafe.Pointer(dirent)), unsafe.Sizeof(syscall.Dirent{})), buf)
if v := unsafe.Offsetof(dirent.Reclen) + unsafe.Sizeof(dirent.Reclen); uintptr(len(buf)) < v {
panic(fmt.Sprintf("buf size of %d smaller than dirent header size %d", len(buf), v))
}
if len(buf) < int(dirent.Reclen) {
panic(fmt.Sprintf("buf size %d < record length %d", len(buf), dirent.Reclen))
}
consumed = int(dirent.Reclen)
if dirent.Ino == 0 { // File absent in directory.
return
}
name = unsafe.Slice((*byte)(unsafe.Pointer(&dirent.Name[0])), direntNamlen(dirent))
return
}
var procSelfFDName = []byte("/proc/self/fd\x00")
func openProcSelfFD() (fd int, err error) {
for {
r0, _, e1 := syscall.Syscall(syscall.SYS_OPEN,
uintptr(unsafe.Pointer(&procSelfFDName[0])),
0, 0)
if e1 == 0 {
return int(r0), nil
}
if e1 == syscall.EINTR {
// Since https://golang.org/doc/go1.14#runtime we
// need to loop on EINTR on more places.
continue
}
return 0, syscall.Errno(e1)
}
}
func readDirent(fd int, buf []byte) (n int, err error) {
for {
nbuf, err := syscall.ReadDirent(fd, buf)
if err != syscall.EINTR {
return nbuf, err
}
}
}

View File

@ -5,6 +5,7 @@
package metrics
import (
"os"
"runtime"
"testing"
)
@ -13,10 +14,31 @@ func TestCurrentFileDescriptors(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skipf("skipping on %v", runtime.GOOS)
}
if n := CurrentFDs(); n < 3 {
t.Errorf("got %v; want >= 3", n)
} else {
t.Logf("got %v", n)
n := CurrentFDs()
if n < 3 {
t.Fatalf("got %v; want >= 3", n)
}
allocs := int(testing.AllocsPerRun(100, func() {
n = CurrentFDs()
}))
if allocs != 0 {
t.Errorf("allocs = %v; want 0", allocs)
}
// Open some FDs.
const extra = 10
for i := 0; i < extra; i++ {
f, err := os.Open("/proc/self/stat")
if err != nil {
t.Fatal(err)
}
defer f.Close()
}
n2 := CurrentFDs()
if n2 != n+extra {
t.Errorf("fds changed from %v => %v, want to %v", n, n2, n+extra)
}
}