derp/derphttp: don't block in LocalAddr method
The derphttp.Client mutex is held during connects (for up to 10 seconds) so this LocalAddr method (blocking on said mutex) could also block for up to 10 seconds, causing a pileup upstream in magicsock/wgengine and ultimately a watchdog timeout resulting in a crash. Updates #11519 Change-Id: Idd1d94ee00966be1b901f6899d8b9492f18add0f Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
e6983baa73
commit
b27238b654
|
@ -86,6 +86,7 @@ type Client struct {
|
|||
addrFamSelAtomic syncs.AtomicValue[AddressFamilySelector]
|
||||
|
||||
mu sync.Mutex
|
||||
atomicState syncs.AtomicValue[ConnectedState] // hold mu to write
|
||||
started bool // true upon first connect, never transitions to false
|
||||
preferred bool
|
||||
canAckPings bool
|
||||
|
@ -99,6 +100,14 @@ type Client struct {
|
|||
clock tstime.Clock
|
||||
}
|
||||
|
||||
// ConnectedState describes the state of a derphttp Client.
|
||||
type ConnectedState struct {
|
||||
Connected bool
|
||||
Connecting bool
|
||||
Closed bool
|
||||
LocalAddr netip.AddrPort // if Connected
|
||||
}
|
||||
|
||||
func (c *Client) String() string {
|
||||
return fmt.Sprintf("<derphttp_client.Client %s url=%s>", c.ServerPublicKey().ShortString(), c.url)
|
||||
}
|
||||
|
@ -307,6 +316,12 @@ func (c *Client) connect(ctx context.Context, caller string) (client *derp.Clien
|
|||
if c.client != nil {
|
||||
return c.client, c.connGen, nil
|
||||
}
|
||||
c.atomicState.Store(ConnectedState{Connecting: true})
|
||||
defer func() {
|
||||
if err != nil {
|
||||
c.atomicState.Store(ConnectedState{Connecting: false})
|
||||
}
|
||||
}()
|
||||
|
||||
// timeout is the fallback maximum time (if ctx doesn't limit
|
||||
// it further) to do all of: DNS + TCP + TLS + HTTP Upgrade +
|
||||
|
@ -524,6 +539,12 @@ func (c *Client) connect(ctx context.Context, caller string) (client *derp.Clien
|
|||
c.netConn = tcpConn
|
||||
c.tlsState = tlsState
|
||||
c.connGen++
|
||||
|
||||
localAddr, _ := c.client.LocalAddr()
|
||||
c.atomicState.Store(ConnectedState{
|
||||
Connected: true,
|
||||
LocalAddr: localAddr,
|
||||
})
|
||||
return c.client, c.connGen, nil
|
||||
}
|
||||
|
||||
|
@ -906,16 +927,15 @@ func (c *Client) SendPing(data [8]byte) error {
|
|||
// LocalAddr reports c's local TCP address, without any implicit
|
||||
// connect or reconnect.
|
||||
func (c *Client) LocalAddr() (netip.AddrPort, error) {
|
||||
c.mu.Lock()
|
||||
closed, client := c.closed, c.client
|
||||
c.mu.Unlock()
|
||||
if closed {
|
||||
st := c.atomicState.Load()
|
||||
if st.Closed {
|
||||
return netip.AddrPort{}, ErrClientClosed
|
||||
}
|
||||
if client == nil {
|
||||
la := st.LocalAddr
|
||||
if !st.Connected && !la.IsValid() {
|
||||
return netip.AddrPort{}, errors.New("client not connected")
|
||||
}
|
||||
return client.LocalAddr()
|
||||
return la, nil
|
||||
}
|
||||
|
||||
func (c *Client) ForwardPacket(from, to key.NodePublic, b []byte) error {
|
||||
|
@ -1049,6 +1069,7 @@ func (c *Client) Close() error {
|
|||
if c.netConn != nil {
|
||||
c.netConn.Close()
|
||||
}
|
||||
c.atomicState.Store(ConnectedState{Closed: true})
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
|
@ -447,3 +448,16 @@ func TestRunWatchConnectionLoopServeConnect(t *testing.T) {
|
|||
}
|
||||
watcher.RunWatchConnectionLoop(ctx, key.NodePublic{}, t.Logf, noopAdd, noopRemove)
|
||||
}
|
||||
|
||||
// verify that the LocalAddr method doesn't acquire the mutex.
|
||||
// See https://github.com/tailscale/tailscale/issues/11519
|
||||
func TestLocalAddrNoMutex(t *testing.T) {
|
||||
var c Client
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock() // not needed in test but for symmetry
|
||||
|
||||
_, err := c.LocalAddr()
|
||||
if got, want := fmt.Sprint(err), "client not connected"; got != want {
|
||||
t.Errorf("got error %q; want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue