fn: fnlb: enhancements and new grouper tests (#493)

* fn: fnlb: enhancements and new grouper tests

*) added healthy threshold (default: 1)
*) grouper is now using configured hcEndpoint for version checks
*) grouper now logs when servers switch between healthy/unhealthy status
*) moved DB code out of grouper
*) run health check immediately at start (don't wait until hcInterval)
*) optional shutdown timeout (default: 0) & mgmt port (default: 8081)
*) hot path List() in grouper now uses atomic ptr Load
*) consistent router: moved closure to a new function
*) bugfix: version parsing from fn servers should not panic fnlb
*) bugfix: servers removed from DB, stayed in healthy list
*) bugfix: if DB is down, health checker stopped monitoring
*) basic new tests for grouper (add/rm/unhealthy/healthy) server
This commit is contained in:
Tolga Ceylan
2017-11-16 11:35:30 -08:00
committed by GitHub
parent 910612d0b1
commit 657afd5838
7 changed files with 937 additions and 290 deletions

View File

@@ -27,9 +27,12 @@ func main() {
var conf lb.Config
flag.StringVar(&conf.DBurl, "db", "sqlite3://:memory:", "backend to store nodes, default to in memory")
flag.StringVar(&conf.Listen, "listen", ":8081", "port to run on")
flag.StringVar(&conf.MgmtListen, "mgmt-listen", ":8081", "management port to run on")
flag.IntVar(&conf.ShutdownTimeout, "shutdown-timeout", 0, "graceful shutdown timeout")
flag.IntVar(&conf.HealthcheckInterval, "hc-interval", 3, "how often to check f(x) nodes, in seconds")
flag.StringVar(&conf.HealthcheckEndpoint, "hc-path", "/version", "endpoint to determine node health")
flag.IntVar(&conf.HealthcheckUnhealthy, "hc-unhealthy", 2, "threshold of failed checks to declare node unhealthy")
flag.IntVar(&conf.HealthcheckHealthy, "hc-healthy", 1, "threshold of success checks to declare node healthy")
flag.IntVar(&conf.HealthcheckTimeout, "hc-timeout", 5, "timeout of healthcheck endpoint, in seconds")
flag.StringVar(&conf.ZipkinURL, "zipkin", "", "zipkin endpoint to send traces")
flag.Parse()
@@ -54,7 +57,12 @@ func main() {
},
}
g, err := lb.NewAllGrouper(conf)
db, err := lb.NewDB(conf)
if err != nil {
logrus.WithError(err).Fatal("error setting up database")
}
g, err := lb.NewAllGrouper(conf, db)
if err != nil {
logrus.WithError(err).Fatal("error setting up grouper")
}
@@ -64,27 +72,57 @@ func main() {
return r.URL.Path, nil
}
h := lb.NewProxy(k, g, r, conf)
h = g.Wrap(h) // add/del/list endpoints
h = r.Wrap(h) // stats / dash endpoint
servers := make([]*http.Server, 0, 1)
handler := lb.NewProxy(k, g, r, conf)
err = serve(conf.Listen, h)
if err != nil {
logrus.WithError(err).Fatal("server error")
// a separate mgmt listener is requested? then let's create a LB traffic only server
if conf.Listen != conf.MgmtListen {
servers = append(servers, &http.Server{Addr: conf.Listen, Handler: handler})
handler = lb.NullHandler()
}
// add mgmt endpoints to the handler
handler = g.Wrap(handler) // add/del/list endpoints
handler = r.Wrap(handler) // stats / dash endpoint
servers = append(servers, &http.Server{Addr: conf.MgmtListen, Handler: handler})
serve(servers, &conf)
}
func serve(addr string, handler http.Handler) error {
server := &http.Server{Addr: addr, Handler: handler}
func serve(servers []*http.Server, conf *lb.Config) {
ch := make(chan os.Signal, 1)
signal.Notify(ch, syscall.SIGQUIT, syscall.SIGINT)
go func() {
for sig := range ch {
logrus.WithFields(logrus.Fields{"signal": sig}).Info("received signal")
server.Shutdown(context.Background()) // safe shutdown
return
for i := 0; i < len(servers); i++ {
go func(idx int) {
err := servers[idx].ListenAndServe()
if err != nil && err != http.ErrServerClosed {
logrus.WithFields(logrus.Fields{"server_id": idx}).WithError(err).Fatal("server error")
} else {
logrus.WithFields(logrus.Fields{"server_id": idx}).Info("server stopped")
}
}(i)
}
sig := <-ch
logrus.WithFields(logrus.Fields{"signal": sig}).Info("received signal")
for i := 0; i < len(servers); i++ {
ctx := context.Background()
if conf.ShutdownTimeout > 0 {
tmpCtx, cancel := context.WithTimeout(context.Background(), time.Duration(conf.ShutdownTimeout)*time.Second)
ctx = tmpCtx
defer cancel()
}
}()
return server.ListenAndServe()
err := servers[i].Shutdown(ctx) // safe shutdown
if err != nil {
logrus.WithFields(logrus.Fields{"server_id": i}).WithError(err).Fatal("server shutdown error")
} else {
logrus.WithFields(logrus.Fields{"server_id": i}).Info("server shutdown")
}
}
}