Skip to content

Skip unnecessary disconnect and wait for disconnect to finish in shutdown #10031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/release-notes/release-notes-0.20.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ reader of a payment request.
- [Refactored](https://github.com/lightningnetwork/lnd/pull/10018) `channelLink`
to improve readability and maintainability of the code.

- Remove unnecessary
[disconnect](https://github.com/lightningnetwork/lnd/pull/10031) and properly
wait for goroutine to finish when shutting down.

## Breaking Changes
## Performance Improvements

Expand Down
49 changes: 39 additions & 10 deletions peer/brontide.go
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,9 @@ func NewBrontide(cfg Config) *Brontide {
// Start starts all helper goroutines the peer needs for normal operations. In
// the case this peer has already been started, then this function is a noop.
func (p *Brontide) Start() error {
if atomic.AddInt32(&p.started, 1) != 1 {
if !atomic.CompareAndSwapInt32(&p.started, 0, 1) {
p.log.Warn("already started")

return nil
}

Expand Down Expand Up @@ -1579,17 +1581,29 @@ func (p *Brontide) maybeSendChannelUpdates() {
// calling Disconnect will signal the quit channel and the method will not
// block, since no goroutines were spawned.
func (p *Brontide) WaitForDisconnect(ready chan struct{}) {
p.log.Trace("waiting for disconnect")
defer p.log.Trace("peer disconnected")

// Before we try to call the `Wait` goroutine, we'll make sure the main
// set of goroutines are already active.
select {
case <-p.startReady:
p.log.Trace("startReady received, waiting for signal ready")

case <-p.cg.Done():
p.log.Trace("peer quit, exit waiting for signal startReady")

return
}

select {
case <-ready:
p.log.Trace("ready received, waiting goroutines to finish")

case <-p.cg.Done():
p.log.Trace("peer quit, exit waiting for signal ready")

return
}

p.cg.WgWait()
Expand All @@ -1604,6 +1618,9 @@ func (p *Brontide) WaitForDisconnect(ready chan struct{}) {
// the peer has finished starting up before calling this method.
func (p *Brontide) Disconnect(reason error) {
if !atomic.CompareAndSwapInt32(&p.disconnect, 0, 1) {
p.log.Warnf("got disconnect reason [%v], but peer already "+
"disconnected", reason)

return
}

Expand All @@ -1614,11 +1631,13 @@ func (p *Brontide) Disconnect(reason error) {
// started, otherwise we will skip reading it as this chan won't be
// closed, hence blocks forever.
if atomic.LoadInt32(&p.started) == 1 {
p.log.Debugf("Peer hasn't finished starting up yet, waiting " +
"on startReady signal before closing connection")
p.log.Debug("waiting on startReady signal before closing " +
"connection")

select {
case <-p.startReady:
p.log.Debug("startReady received")

case <-p.cg.Done():
return
}
Expand Down Expand Up @@ -2047,6 +2066,9 @@ func (p *Brontide) readHandler() {
// gossiper?
p.initGossipSync()

// exitErr is the error to be used when disconnect the peer.
var exitErr error

discStream := newDiscMsgStream(p)
discStream.Start()
defer discStream.Stop()
Expand All @@ -2060,7 +2082,8 @@ out:
}
}
if err != nil {
p.log.Infof("unable to read message from peer: %v", err)
p.log.Debugf("unable to read message from peer: %v",
err)

// If we could not read our peer's message due to an
// unknown type or invalid alias, we continue processing
Expand Down Expand Up @@ -2098,6 +2121,7 @@ out:
// didn't recognize, then we'll stop all processing as
// this is a fatal error.
default:
exitErr = err
break out
}
}
Expand Down Expand Up @@ -2182,8 +2206,7 @@ out:
err := p.resendChanSyncMsg(targetChan)
if err != nil {
// TODO(halseth): send error to peer?
p.log.Errorf("resend failed: %v",
err)
p.log.Errorf("resend failed: %v", err)
}
}

Expand Down Expand Up @@ -2242,9 +2265,13 @@ out:
idleTimer.Reset(idleTimeout)
}

p.Disconnect(errors.New("read handler closed"))
// Disconnect the peer on exitErr, but only if the peer hasn't been
// disconnected before.
if atomic.LoadInt32(&p.disconnect) == 0 {
p.Disconnect(exitErr)
}

p.log.Trace("readHandler for peer done")
p.log.Debugf("peer quit, exit readHandler")
}

// handleCustomMessage handles the given custom message if a handler is
Expand Down Expand Up @@ -2729,7 +2756,7 @@ out:
}

case <-p.cg.Done():
exitErr = lnpeer.ErrPeerExiting
p.log.Debug("peer quit, exit writeHandler")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't we want to record the peerExiting error anymore ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here we already know the peer is quitting, which means it has been disconnected, so we don't use this error and call disconnect again below.

break out
}
}
Expand All @@ -2738,7 +2765,9 @@ out:
// disconnect.
p.cg.WgDone()

p.Disconnect(exitErr)
if exitErr != nil {
p.Disconnect(exitErr)
}

p.log.Trace("writeHandler for peer done")
}
Expand Down
7 changes: 6 additions & 1 deletion server.go
Original file line number Diff line number Diff line change
Expand Up @@ -5209,7 +5209,12 @@ func (s *server) DisconnectPeer(pubKey *btcec.PublicKey) error {
//
// NOTE: We call it in a goroutine to avoid blocking the main server
// goroutine because we might hold the server's mutex.
go peer.Disconnect(fmt.Errorf("server: DisconnectPeer called"))
s.wg.Add(1)
go func() {
defer s.wg.Done()

peer.Disconnect(fmt.Errorf("server: DisconnectPeer called"))
}()

return nil
}
Expand Down
Loading