Last active
July 7, 2023 06:53
-
-
Save flotter/483d233d38f0961a52f7ef7405887012 to your computer and use it in GitHub Desktop.
Pebble daemon server shutdown race
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The race symptom you are looking out for is: | |
// HTTP server Shutdown: close tcp [::]:<port>: use of closed network connection | |
// | |
// in order or find this, run: | |
// while true ; do ./go-listener-race | grep Shutdown ; done | |
// | |
// The race is between the Serve() thread returning after the listener accept() was | |
// terminated with a direct listener close(), and the server Shutdown() command running | |
// through another iteration of closing listeners. Normally this is not a problem because | |
// the Close() is wrapped in a once.Once(), but since the port is already closed, neither | |
// can succeed. However, the Serve() deferred Close() discards its error, so if the | |
// Serve() win, all looks fine. If the Shutdown() win getting to the close first, an | |
// error (as above) is recorded. | |
package main | |
import ( | |
"context" | |
"fmt" | |
"gopkg.in/tomb.v2" | |
"net" | |
"net/http" | |
"time" | |
) | |
const lnCount = 10 | |
var ( | |
srv http.Server | |
t tomb.Tomb | |
) | |
func main() { | |
lns := make([]net.Listener, lnCount) | |
for c, _ := range lns { | |
l, err := net.Listen("tcp", fmt.Sprintf(":%d", 2000+c)) | |
if err != nil { | |
fmt.Printf("Listen failed: %v\n", err) | |
} | |
lns[c] = l | |
} | |
defer func() { | |
for _, l := range lns { | |
l.Close() | |
} | |
}() | |
for c, l := range lns { | |
fmt.Printf("L %d => Port %#v\n", c, l.Addr().(*net.TCPAddr).Port) | |
} | |
for c, _ := range lns { | |
func() { | |
defer func(n net.Listener) { | |
t.Go(func() (err error) { | |
fmt.Printf("Entering serve (port %#v)\n", n.Addr().(*net.TCPAddr).Port) | |
if err := srv.Serve(n); err != nil { | |
// Error starting or closing listener: | |
fmt.Printf("HTTP Serve error: %v\n", err) | |
} | |
fmt.Printf("Existing serve (port %#v)\n", n.Addr().(*net.TCPAddr).Port) | |
return err | |
}) | |
}(lns[c]) | |
}() | |
} | |
time.Sleep(time.Millisecond * 50) | |
fmt.Printf("Closing listeners ...\n") | |
// Race condition trigger: listeners are not thread safe in the same way that the HTTP server is. Furthermore, | |
// the HTTP server is managing the lifecycle of each of its listeners, with code for deferred closure | |
// inside Serve(), but also srv.Close() and srv.Shutdown() deals with purposeful terminations. | |
for _, l := range lns { | |
l.Close() | |
} | |
// The actual race is between the Serve() defer closing the listener at the same time that Shutdown() is closing the | |
// listener. This race in itself is gaurded by sync.Once(), but the issue is if Serve() wins to Close(), no error is | |
// returned (its ignored) while if Shutdown() wins, an error is captured. | |
if err := srv.Shutdown(context.Background()); err != nil { | |
// Error from closing listeners, or context timeout: | |
fmt.Printf("HTTP server Shutdown: %v", err) | |
} | |
t.Wait() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment