log/sockstatlog: add delay before writing logs to disk
Split apart polling of sockstats and logging them to disk. Add a 3 second delay before writing logs to disk to prevent an infinite upload loop when uploading stats to logcatcher. Fixes #7719 Signed-off-by: Will Norris <will@tailscale.com>
This commit is contained in:
parent
985535aebc
commit
62a1e9a44f
|
@ -1,7 +1,9 @@
|
|||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package sockstatlog provides a logger for capturing and storing network socket stats.
|
||||
// Package sockstatlog provides a logger for capturing network socket stats for debugging.
|
||||
// Stats are collected at a frequency of 10 Hz and logged to disk.
|
||||
// Stats are only uploaded to the log server on demand.
|
||||
package sockstatlog
|
||||
|
||||
import (
|
||||
|
@ -25,19 +27,29 @@ import (
|
|||
"tailscale.com/util/mak"
|
||||
)
|
||||
|
||||
// pollPeriod specifies how often to poll for socket stats.
|
||||
const pollPeriod = time.Second / 10
|
||||
// pollInterval specifies how often to poll for socket stats.
|
||||
const pollInterval = time.Second / 10
|
||||
|
||||
// logInterval specifies how often to log sockstat events to disk.
|
||||
// This delay is added to prevent an infinite loop when logs are uploaded,
|
||||
// which itself creates additional sockstat events.
|
||||
const logInterval = 3 * time.Second
|
||||
|
||||
// Logger logs statistics about network sockets.
|
||||
type Logger struct {
|
||||
// enabled identifies whether the logger is enabled.
|
||||
enabled atomic.Bool
|
||||
|
||||
ctx context.Context
|
||||
cancelFn context.CancelFunc
|
||||
|
||||
ticker *time.Ticker
|
||||
logf logger.Logf
|
||||
// eventCh is used to pass events from the poller to the logger.
|
||||
eventCh chan event
|
||||
|
||||
logf logger.Logf
|
||||
|
||||
// logger is the underlying logtail logger than manages log files on disk
|
||||
// and uploading to the log server.
|
||||
logger *logtail.Logger
|
||||
filch *filch.Filch
|
||||
tr http.RoundTripper
|
||||
|
@ -73,6 +85,7 @@ func SockstatLogID(logID logid.PublicID) logid.PrivateID {
|
|||
// NewLogger returns a new Logger that will store stats in logdir.
|
||||
// On platforms that do not support sockstat logging, a nil Logger will be returned.
|
||||
// The returned Logger is not yet enabled, and must be shut down with Shutdown when it is no longer needed.
|
||||
// Logs will be uploaded to the log server using a new log ID derived from the provided backend logID.
|
||||
func NewLogger(logdir string, logf logger.Logf, logID logid.PublicID) (*Logger, error) {
|
||||
if !sockstats.IsAvailable {
|
||||
return nil, nil
|
||||
|
@ -88,10 +101,9 @@ func NewLogger(logdir string, logf logger.Logf, logID logid.PublicID) (*Logger,
|
|||
}
|
||||
|
||||
logger := &Logger{
|
||||
ticker: time.NewTicker(pollPeriod),
|
||||
logf: logf,
|
||||
filch: filch,
|
||||
tr: logpolicy.NewLogtailTransport(logtail.DefaultHost),
|
||||
logf: logf,
|
||||
filch: filch,
|
||||
tr: logpolicy.NewLogtailTransport(logtail.DefaultHost),
|
||||
}
|
||||
logger.logger = logtail.NewLogger(logtail.Config{
|
||||
BaseURL: logpolicy.LogURL(),
|
||||
|
@ -124,8 +136,14 @@ func (l *Logger) SetLoggingEnabled(v bool) {
|
|||
old := l.enabled.Load()
|
||||
if old != v && l.enabled.CompareAndSwap(old, v) {
|
||||
if v {
|
||||
if l.eventCh == nil {
|
||||
// eventCh should be large enough for the number of events that will occur within logInterval.
|
||||
// Add an extra second's worth of events to ensure we don't drop any.
|
||||
l.eventCh = make(chan event, (logInterval+time.Second)/pollInterval)
|
||||
}
|
||||
l.ctx, l.cancelFn = context.WithCancel(context.Background())
|
||||
go l.poll()
|
||||
go l.logEvents()
|
||||
} else {
|
||||
l.cancelFn()
|
||||
}
|
||||
|
@ -137,19 +155,21 @@ func (l *Logger) Write(p []byte) (int, error) {
|
|||
}
|
||||
|
||||
// poll fetches the current socket stats at the configured time interval,
|
||||
// calculates the delta since the last poll, and logs any non-zero values.
|
||||
// calculates the delta since the last poll,
|
||||
// and writes any non-zero values to the logger event channel.
|
||||
// This method does not return.
|
||||
func (l *Logger) poll() {
|
||||
// last is the last set of socket stats we saw.
|
||||
var lastStats *sockstats.SockStats
|
||||
var lastTime time.Time
|
||||
|
||||
enc := json.NewEncoder(l)
|
||||
ticker := time.NewTicker(pollInterval)
|
||||
for {
|
||||
select {
|
||||
case <-l.ctx.Done():
|
||||
ticker.Stop()
|
||||
return
|
||||
case t := <-l.ticker.C:
|
||||
case t := <-ticker.C:
|
||||
stats := sockstats.Get()
|
||||
if lastStats != nil {
|
||||
diffstats := delta(lastStats, stats)
|
||||
|
@ -162,9 +182,7 @@ func (l *Logger) poll() {
|
|||
if stats.CurrentInterfaceCellular {
|
||||
e.IsCellularInterface = 1
|
||||
}
|
||||
if err := enc.Encode(e); err != nil {
|
||||
l.logf("sockstatlog: error encoding log: %v", err)
|
||||
}
|
||||
l.eventCh <- e
|
||||
}
|
||||
}
|
||||
lastTime = t
|
||||
|
@ -173,6 +191,34 @@ func (l *Logger) poll() {
|
|||
}
|
||||
}
|
||||
|
||||
// logEvents reads events from the event channel at logInterval and logs them to disk.
|
||||
// This method does not return.
|
||||
func (l *Logger) logEvents() {
|
||||
enc := json.NewEncoder(l)
|
||||
flush := func() {
|
||||
for {
|
||||
select {
|
||||
case e := <-l.eventCh:
|
||||
if err := enc.Encode(e); err != nil {
|
||||
l.logf("sockstatlog: error encoding log: %v", err)
|
||||
}
|
||||
default:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
ticker := time.NewTicker(logInterval)
|
||||
for {
|
||||
select {
|
||||
case <-l.ctx.Done():
|
||||
ticker.Stop()
|
||||
return
|
||||
case <-ticker.C:
|
||||
flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Logger) LogID() string {
|
||||
if l.logger == nil {
|
||||
return ""
|
||||
|
@ -189,7 +235,6 @@ func (l *Logger) Shutdown() {
|
|||
if l.cancelFn != nil {
|
||||
l.cancelFn()
|
||||
}
|
||||
l.ticker.Stop()
|
||||
l.filch.Close()
|
||||
l.logger.Shutdown(context.Background())
|
||||
|
||||
|
|
|
@ -150,6 +150,7 @@ func NewLogger(cfg Config, logf tslogger.Logf) *Logger {
|
|||
timeNow: cfg.TimeNow,
|
||||
bo: backoff.NewBackoff("logtail", stdLogf, 30*time.Second),
|
||||
metricsDelta: cfg.MetricsDelta,
|
||||
sockstatsLabel: sockstats.LabelLogtailLogger,
|
||||
|
||||
procID: procID,
|
||||
includeProcSequence: cfg.IncludeProcSequence,
|
||||
|
@ -192,6 +193,7 @@ type Logger struct {
|
|||
metricsDelta func() string // or nil
|
||||
privateID logid.PrivateID
|
||||
httpDoCalls atomic.Int32
|
||||
sockstatsLabel sockstats.Label
|
||||
|
||||
procID uint32
|
||||
includeProcSequence bool
|
||||
|
@ -220,6 +222,11 @@ func (l *Logger) SetLinkMonitor(lm *monitor.Mon) {
|
|||
l.linkMonitor = lm
|
||||
}
|
||||
|
||||
// SetSockstatsLabel sets the label used in sockstat logs to identify network traffic from this logger.
|
||||
func (l *Logger) SetSockstatsLabel(label sockstats.Label) {
|
||||
l.sockstatsLabel = label
|
||||
}
|
||||
|
||||
// PrivateID returns the logger's private log ID.
|
||||
//
|
||||
// It exists for internal use only.
|
||||
|
@ -428,7 +435,7 @@ func (l *Logger) awaitInternetUp(ctx context.Context) {
|
|||
// origlen of -1 indicates that the body is not compressed.
|
||||
func (l *Logger) upload(ctx context.Context, body []byte, origlen int) (uploaded bool, err error) {
|
||||
const maxUploadTime = 45 * time.Second
|
||||
ctx = sockstats.WithSockStats(ctx, sockstats.LabelLogtailLogger)
|
||||
ctx = sockstats.WithSockStats(ctx, l.sockstatsLabel)
|
||||
ctx, cancel := context.WithTimeout(ctx, maxUploadTime)
|
||||
defer cancel()
|
||||
|
||||
|
|
Loading…
Reference in New Issue