lb round 2

this rewrites the load balancer which was pointed out to be problematic here:
https://github.com/iron-io/functions/issues/570 with a test suite located at
test/fnlb-test-harness -- this test is now 'passing' in the sense that now
when running multiple functions nodes behind 1 load balancer every call goes
to the same functions node. yay. used a different consistent hashing algorithm
and also threw out all the fallback code (all the code, really).

this is basically an mvp and there is some work that needs to be done for
running multiple load balancers, allowing functions to run on more nodes as
load increases and some basic features like http/2 -- also needs some love to
be really robust; most of this is noted in TODOs in the file.

this does basic health checking configurable in the same way as aws elb. I
think we could probably do gossip but this works as a first cut. after N
health checks fail then requests start to go to a different node, meanwhile
all requests to that node will fail (need to fix). this continues to use
consistent hashing, which is great in that we don't need to store anything,
and we may be able to stick with the strategy in the future with some care.
This commit is contained in:
Reed Allman
2017-05-16 13:36:05 -07:00
parent ab769ebe4a
commit 730e8f9c68
6 changed files with 427 additions and 232 deletions

24
Gopkg.lock generated
View File

@@ -1,4 +1,4 @@
memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba" memo = "01114332a6ce5b71583264fdc63cf899f4ef12030a6febe795a2584189675c23"
[[projects]] [[projects]]
branch = "master" branch = "master"
@@ -90,6 +90,12 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
revision = "8ab6407b697782a06568d4b7f1db25550ec2e4c6" revision = "8ab6407b697782a06568d4b7f1db25550ec2e4c6"
version = "v0.2.0" version = "v0.2.0"
[[projects]]
name = "github.com/dchest/siphash"
packages = ["."]
revision = "42ba037e748c9062a75e0924705c43b893edefcd"
version = "v1.0.0"
[[projects]] [[projects]]
branch = "master" branch = "master"
name = "github.com/dghubble/go-twitter" name = "github.com/dghubble/go-twitter"
@@ -121,10 +127,10 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
version = "v2.6.1" version = "v2.6.1"
[[projects]] [[projects]]
branch = "master" name = "github.com/docker/docker"
name = "github.com/moby/moby" packages = ["api/types","api/types/blkiodev","api/types/container","api/types/filters","api/types/mount","api/types/network","api/types/registry","api/types/strslice","api/types/swarm","api/types/versions","opts","pkg/archive","pkg/fileutils","pkg/homedir","pkg/idtools","pkg/ioutils","pkg/jsonlog","pkg/jsonmessage","pkg/longpath","pkg/pools","pkg/promise","pkg/stdcopy","pkg/system","pkg/term","pkg/term/windows"]
packages = ["api/types","api/types/blkiodev","api/types/container","api/types/filters","api/types/mount","api/types/network","api/types/registry","api/types/strslice","api/types/swarm","api/types/versions","cli/config/configfile","opts","pkg/archive","pkg/fileutils","pkg/homedir","pkg/idtools","pkg/ioutils","pkg/jsonlog","pkg/jsonmessage","pkg/longpath","pkg/pools","pkg/promise","pkg/stdcopy","pkg/system","pkg/term","pkg/term/windows"] revision = "092cba3727bb9b4a2f0e922cd6c0f93ea270e363"
revision = "a9ff628a3c7c55cf016af88f37c4fb6a6029e17a" version = "v1.13.1"
[[projects]] [[projects]]
name = "github.com/docker/go-connections" name = "github.com/docker/go-connections"
@@ -255,7 +261,7 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
[[projects]] [[projects]]
branch = "master" branch = "master"
name = "github.com/golang/groupcache" name = "github.com/golang/groupcache"
packages = ["consistenthash","singleflight"] packages = ["singleflight"]
revision = "72d04f9fcdec7d3821820cc4a6f150eae553639a" revision = "72d04f9fcdec7d3821820cc4a6f150eae553639a"
[[projects]] [[projects]]
@@ -372,6 +378,12 @@ memo = "a3207ce7cdc29bfe86acf8832a25e44560f834ea1adff813e43c96de7be53eba"
packages = ["."] packages = ["."]
revision = "53818660ed4955e899c0bcafa97299a388bd7c8e" revision = "53818660ed4955e899c0bcafa97299a388bd7c8e"
[[projects]]
branch = "master"
name = "github.com/moby/moby"
packages = ["cli/config/configfile","pkg/jsonmessage"]
revision = "a9ff628a3c7c55cf016af88f37c4fb6a6029e17a"
[[projects]] [[projects]]
name = "github.com/opencontainers/runc" name = "github.com/opencontainers/runc"
packages = ["libcontainer/system","libcontainer/user"] packages = ["libcontainer/system","libcontainer/user"]

View File

@@ -1,3 +0,0 @@
all:
go build -o fnlb
./fnlb --help

View File

@@ -1,34 +0,0 @@
package main
import (
"context"
"flag"
"fmt"
"net/http"
"os"
"strings"
"gitlab.oracledx.com/odx/functions/lb"
)
var (
fnodes string
flisten string
)
func init() {
flag.StringVar(&fnodes, "nodes", "127.0.0.1:8080", "comma separated list of Oracle Functions nodes")
flag.StringVar(&flisten, "listen", "0.0.0.0:8081", "listening port for incoming connections")
flag.Parse()
}
func main() {
nodes := strings.Split(fnodes, ",")
p := lb.ConsistentHashReverseProxy(context.Background(), nodes)
fmt.Println("forwarding calls to", nodes)
fmt.Println("listening to", flisten)
if err := http.ListenAndServe(flisten, p); err != nil {
fmt.Fprintln(os.Stderr, "could not start server. error:", err)
os.Exit(1)
}
}

369
lb/lb.go
View File

@@ -1,52 +1,357 @@
package lb package main
import ( import (
"bytes"
"context" "context"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"net/http" "net/http"
"net/http/httputil" "net/http/httputil"
"strconv" "sort"
"strings"
"sync" "sync"
"sync/atomic" "time"
"github.com/golang/groupcache/consistenthash" "github.com/Sirupsen/logrus"
"github.com/dchest/siphash"
) )
// ConsistentHashReverseProxy returns a new ReverseProxy that routes // TODO: consistent hashing is nice to get a cheap way to place nodes but it
// URLs to the scheme, host, and base path provided in by a consistent hash // doesn't account well for certain functions that may be 'hotter' than others.
// algorithm. If the target's path is "/base" and the incoming request was for // we should very likely keep a load ordered list and distribute based on that.
// "/dir", the target request will be for /base/dir. // if we can get some kind of feedback from the f(x) nodes, we can use that.
// ConsistentHashReverseProxy does not rewrite the Host header. // maybe it's good enough to just ch(x) + 1 if ch(x) is marked as "hot"?
func ConsistentHashReverseProxy(ctx context.Context, nodes []string) *httputil.ReverseProxy {
ch := consistenthash.New(len(nodes), nil)
ch.Add(nodes...)
bufPool := sync.Pool{ // TODO the load balancers all need to have the same list of nodes. gossip?
New: func() interface{} { // also gossip would handle failure detection instead of elb style
return new(bytes.Buffer)
}, // TODO when adding nodes we should health check them once before adding them
// TODO when node goes offline should try to redirect request instead of 5xxing
// TODO config
// TODO TLS
func main() {
// XXX (reed): normalize
fnodes := flag.String("nodes", "", "comma separated list of IronFunction nodes")
var conf config
flag.IntVar(&conf.Port, "port", 8081, "port to run on")
flag.IntVar(&conf.HealthcheckInterval, "hc-interval", 3, "how often to check f(x) nodes, in seconds")
flag.StringVar(&conf.HealthcheckEndpoint, "hc-path", "/version", "endpoint to determine node health")
flag.IntVar(&conf.HealthcheckUnhealthy, "hc-unhealthy", 2, "threshold of failed checks to declare node unhealthy")
flag.IntVar(&conf.HealthcheckTimeout, "hc-timeout", 5, "timeout of healthcheck endpoint, in seconds")
flag.Parse()
conf.Nodes = strings.Split(*fnodes, ",")
ch := newProxy(conf)
// XXX (reed): safe shutdown
fmt.Println(http.ListenAndServe(":8081", ch))
}
type config struct {
Port int `json:"port"`
Nodes []string `json:"nodes"`
HealthcheckInterval int `json:"healthcheck_interval"`
HealthcheckEndpoint string `json:"healthcheck_endpoint"`
HealthcheckUnhealthy int `json:"healthcheck_unhealthy"`
HealthcheckTimeout int `json:"healthcheck_timeout"`
}
type chProxy struct {
ch consistentHash
sync.RWMutex
// TODO map[string][]time.Time
ded map[string]int64
hcInterval time.Duration
hcEndpoint string
hcUnhealthy int64
hcTimeout time.Duration
proxy *httputil.ReverseProxy
}
func newProxy(conf config) *chProxy {
ch := &chProxy{
ded: make(map[string]int64),
// XXX (reed): need to be reconfigurable at some point
hcInterval: time.Duration(conf.HealthcheckInterval) * time.Second,
hcEndpoint: conf.HealthcheckEndpoint,
hcUnhealthy: int64(conf.HealthcheckUnhealthy),
hcTimeout: time.Duration(conf.HealthcheckTimeout) * time.Second,
} }
var i int64
director := func(req *http.Request) { director := func(req *http.Request) {
buf := bufPool.Get().(*bytes.Buffer) target := ch.ch.get(req.URL.Path)
defer bufPool.Put(buf)
buf.Reset()
buf.WriteString(req.URL.Path)
buf.WriteString("??")
b := strconv.AppendInt(buf.Bytes(), atomic.AddInt64(&i, 1), 10)
target := ch.Get(string(b)) req.URL.Scheme = "http" // XXX (reed): h2 support
req.URL.Scheme = "http"
req.URL.Host = target req.URL.Host = target
if _, ok := req.Header["User-Agent"]; !ok { }
// explicitly disable User-Agent so it's not set to default value
req.Header.Set("User-Agent", "") ch.proxy = &httputil.ReverseProxy{
// XXX (reed): optimized http client
// XXX (reed): buffer pool
Director: director,
}
for _, n := range conf.Nodes {
// XXX (reed): need to health check these
ch.ch.add(n)
}
go ch.healthcheck()
return ch
}
func (ch *chProxy) healthcheck() {
for range time.Tick(ch.hcInterval) {
nodes := ch.ch.list()
nodes = append(nodes, ch.dead()...)
// XXX (reed): need to figure out elegant adding / removing better
for _, n := range nodes {
go ch.ping(n)
}
}
}
func (ch *chProxy) ping(node string) {
req, _ := http.NewRequest("GET", "http://"+node+ch.hcEndpoint, nil)
ctx, cancel := context.WithTimeout(context.Background(), ch.hcTimeout)
defer cancel()
req = req.WithContext(ctx)
// XXX (reed): use same transport as proxy is using
resp, err := http.DefaultClient.Do(req)
if resp != nil && resp.Body != nil {
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
}
if err != nil || resp.StatusCode < 200 || resp.StatusCode > 299 {
logrus.WithFields(logrus.Fields{"node": node}).Error("health check failed")
ch.fail(node)
} else {
ch.alive(node)
}
}
func (ch *chProxy) fail(node string) {
// shouldn't be a hot path so shouldn't be too contended on since health
// checks are infrequent
ch.Lock()
ch.ded[node]++
failed := ch.ded[node]
ch.Unlock()
if failed >= ch.hcUnhealthy {
ch.ch.remove(node) // TODO under lock?
}
}
func (ch *chProxy) alive(node string) {
ch.RLock()
_, ok := ch.ded[node]
ch.RUnlock()
if ok {
ch.Lock()
delete(ch.ded, node)
ch.Unlock()
ch.ch.add(node) // TODO under lock?
}
}
func (ch *chProxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/1/lb/nodes" {
switch r.Method {
case "PUT":
// XXX (reed): addNode
ch.addNode(w, r)
return
case "DELETE":
// XXX (reed): removeNode?
ch.removeNode(w, r)
return
case "GET":
ch.listNodes(w, r)
return
}
// XXX (reed): stats?
// XXX (reed): probably do these on a separate port to avoid conflicts
}
ch.proxy.ServeHTTP(w, r)
}
func (ch *chProxy) addNode(w http.ResponseWriter, r *http.Request) {
var bod struct {
Node string `json:"node"`
}
err := json.NewDecoder(r.Body).Decode(&bod)
if err != nil {
sendError(w, http.StatusBadRequest, err.Error())
return
}
ch.ch.add(bod.Node)
sendSuccess(w, "node added")
}
func (ch *chProxy) removeNode(w http.ResponseWriter, r *http.Request) {
var bod struct {
Node string `json:"node"`
}
err := json.NewDecoder(r.Body).Decode(&bod)
if err != nil {
sendError(w, http.StatusBadRequest, err.Error())
return
}
ch.ch.remove(bod.Node)
sendSuccess(w, "node deleted")
}
func (ch *chProxy) listNodes(w http.ResponseWriter, r *http.Request) {
nodes := ch.ch.list()
dead := ch.dead()
out := make(map[string]string, len(nodes)+len(dead))
for _, n := range nodes {
if ch.isDead(n) {
out[n] = "offline"
} else {
out[n] = "online"
} }
} }
return &httputil.ReverseProxy{ for _, n := range dead {
Director: director, out[n] = "offline"
Transport: NewRoundTripper(ctx, nodes), }
sendValue(w, struct {
Nodes map[string]string `json:"nodes"`
}{
Nodes: out,
})
}
func (ch *chProxy) isDead(node string) bool {
ch.RLock()
val, ok := ch.ded[node]
ch.RUnlock()
return ok && val >= ch.hcUnhealthy
}
func (ch *chProxy) dead() []string {
ch.RLock()
defer ch.RUnlock()
nodes := make([]string, 0, len(ch.ded))
for n, val := range ch.ded {
if val >= ch.hcUnhealthy {
nodes = append(nodes, n)
}
}
return nodes
}
func sendValue(w http.ResponseWriter, v interface{}) {
err := json.NewEncoder(w).Encode(v)
if err != nil {
logrus.WithError(err).Error("error writing response response")
} }
} }
func sendSuccess(w http.ResponseWriter, msg string) {
err := json.NewEncoder(w).Encode(struct {
Msg string `json:"msg"`
}{
Msg: msg,
})
if err != nil {
logrus.WithError(err).Error("error writing response response")
}
}
func sendError(w http.ResponseWriter, code int, msg string) {
w.WriteHeader(code)
err := json.NewEncoder(w).Encode(struct {
Msg string `json:"msg"`
}{
Msg: msg,
})
if err != nil {
logrus.WithError(err).Error("error writing response response")
}
}
// consistentHash will maintain a list of strings which can be accessed by
// keying them with a separate group of strings
type consistentHash struct {
// protects nodes
sync.RWMutex
nodes []string
}
func (ch *consistentHash) add(newb string) {
ch.Lock()
defer ch.Unlock()
// filter dupes, under lock. sorted, so binary search
i := sort.SearchStrings(ch.nodes, newb)
if i < len(ch.nodes) && ch.nodes[i] == newb {
return
}
ch.nodes = append(ch.nodes, newb)
// need to keep in sorted order so that hash index works across nodes
sort.Sort(sort.StringSlice(ch.nodes))
}
func (ch *consistentHash) remove(ded string) {
ch.Lock()
i := sort.SearchStrings(ch.nodes, ded)
if i < len(ch.nodes) && ch.nodes[i] == ded {
ch.nodes = append(ch.nodes[:i], ch.nodes[i+1:]...)
}
ch.Unlock()
}
// return a copy
func (ch *consistentHash) list() []string {
ch.RLock()
ret := make([]string, len(ch.nodes))
copy(ret, ch.nodes)
ch.RUnlock()
return ret
}
func (ch *consistentHash) get(key string) string {
// crc not unique enough & sha is too slow, it's 1 import
sum64 := siphash.Hash(0, 0x4c617279426f6174, []byte(key))
ch.RLock()
defer ch.RUnlock()
i := int(jumpConsistentHash(sum64, int32(len(ch.nodes))))
return ch.nodes[i]
}
// A Fast, Minimal Memory, Consistent Hash Algorithm:
// https://arxiv.org/ftp/arxiv/papers/1406/1406.2294.pdf
func jumpConsistentHash(key uint64, num_buckets int32) int32 {
var b, j int64 = -1, 0
for j < int64(num_buckets) {
b = j
key = key*2862933555777941757 + 1
j = (b + 1) * int64((1<<31)/(key>>33)+1)
}
return int32(b)
}

72
lb/lb_test.go Normal file
View File

@@ -0,0 +1,72 @@
package main
import "testing"
func TestCHAdd(t *testing.T) {
var ch consistentHash
nodes := []string{"1", "2", "3"}
for _, n := range nodes {
ch.add(n)
}
if len(ch.nodes) != 3 {
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
}
// test dupes don't get added
for _, n := range nodes {
ch.add(n)
}
if len(ch.nodes) != 3 {
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
}
}
func TestCHRemove(t *testing.T) {
var ch consistentHash
nodes := []string{"1", "2", "3"}
for _, n := range nodes {
ch.add(n)
}
if len(ch.nodes) != 3 {
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
}
ch.remove("4")
if len(ch.nodes) != 3 {
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
}
ch.remove("3")
if len(ch.nodes) != 2 {
t.Fatal("nodes list should be len of 2, got:", len(ch.nodes))
}
ch.remove("3")
if len(ch.nodes) != 2 {
t.Fatal("nodes list should be len of 2, got:", len(ch.nodes))
}
}
func TestCHGet(t *testing.T) {
var ch consistentHash
nodes := []string{"1", "2", "3"}
for _, n := range nodes {
ch.add(n)
}
if len(ch.nodes) != 3 {
t.Fatal("nodes list should be len of 3, got:", len(ch.nodes))
}
keys := []string{"a", "b", "c"}
for _, k := range keys {
_ = ch.get(k)
// testing this doesn't panic basically? could test distro but meh
}
}

View File

@@ -1,157 +0,0 @@
package lb
import (
"context"
"errors"
"net"
"net/http"
"sync"
"time"
"github.com/golang/groupcache/singleflight"
)
// ErrNoFallbackNodeFound happens when the fallback routine does not manage to
// find a TCP reachable node in alternative to the chosen one.
var ErrNoFallbackNodeFound = errors.New("no fallback node found - whole cluster seems offline")
// FallbackRoundTripper implements http.RoundTripper in a way that when an
// outgoing request does not manage to succeed with its original target host,
// it fallsback to a list of alternative hosts. Internally it keeps a list of
// dead hosts, and pings them until they are back online, diverting traffic
// back to them. This is meant to be used by ConsistentHashReverseProxy().
type FallbackRoundTripper struct {
nodes []string
sf singleflight.Group
mu sync.Mutex
// a list of offline servers that must be rechecked to see when they
// get back online. If a server is in this list, it must have a fallback
// available to which requests are sent.
fallback map[string]string
}
// NewRoundTripper creates a new FallbackRoundTripper and triggers the internal
// host TCP health checks.
func NewRoundTripper(ctx context.Context, nodes []string) *FallbackRoundTripper {
frt := &FallbackRoundTripper{
nodes: nodes,
fallback: make(map[string]string),
}
go frt.checkHealth(ctx)
return frt
}
func (f *FallbackRoundTripper) checkHealth(ctx context.Context) {
tick := time.NewTicker(1 * time.Second)
defer tick.Stop()
for {
select {
case <-ctx.Done():
return
case <-tick.C:
f.mu.Lock()
if len(f.fallback) == 0 {
f.mu.Unlock()
continue
}
fallback := make(map[string]string)
for k, v := range f.fallback {
fallback[k] = v
}
f.mu.Unlock()
updatedlist := make(map[string]string)
for host, target := range fallback {
if !f.ping(host) {
updatedlist[host] = target
}
}
f.mu.Lock()
f.fallback = make(map[string]string)
for k, v := range updatedlist {
f.fallback[k] = v
}
f.mu.Unlock()
}
}
}
func (f *FallbackRoundTripper) ping(host string) bool {
conn, err := net.Dial("tcp", host)
if err != nil {
return false
}
conn.Close()
return true
}
func (f *FallbackRoundTripper) fallbackHost(targetHost, failedFallback string) string {
detected, err := f.sf.Do(targetHost, func() (interface{}, error) {
for _, node := range f.nodes {
if node != targetHost && node != failedFallback && f.ping(node) {
f.mu.Lock()
f.fallback[targetHost] = node
f.mu.Unlock()
return node, nil
}
}
return "", ErrNoFallbackNodeFound
})
if err != nil {
return ""
}
return detected.(string)
}
// RoundTrip implements http.RoundTrip. It tried to fullfil an *http.Request to
// its original target host, falling back to a list of nodes in case of failure.
// After the first failure, it consistently delivers traffic to the fallback
// host, until original host is back online. If no fallback node is available,
// it fails with ErrNoFallbackNodeFound. In case of cascaded failure, that is,
// the fallback node is also offline, it will look for another online host.
func (f *FallbackRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
targetHost := req.URL.Host
f.mu.Lock()
fallback, ok := f.fallback[targetHost]
f.mu.Unlock()
if ok {
req.URL.Host = fallback
resp, err := f.callNode(req)
if err == nil {
return resp, err
}
fallback := f.fallbackHost(targetHost, fallback)
if fallback == "" {
return nil, ErrNoFallbackNodeFound
}
req.URL.Host = fallback
return f.callNode(req)
}
resp, err := f.callNode(req)
if err == nil {
return resp, err
}
fallback = f.fallbackHost(targetHost, "")
if fallback == "" {
return nil, ErrNoFallbackNodeFound
}
req.URL.Host = fallback
return f.callNode(req)
}
func (f *FallbackRoundTripper) callNode(req *http.Request) (*http.Response, error) {
requestURI := req.RequestURI
req.RequestURI = ""
resp, err := http.DefaultClient.Do(req)
if err == nil {
resp.Request.RequestURI = requestURI
}
return resp, err
}